Refine bench_record and bench_playback:
- use high-precision wall timer only - warm caches once before measuring - measure independent samples, calculating statistics - add --verbose to control how much data we output Also removed some unloved features from bench_record. BUG=skia: R=jcgregorio@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/338203002
This commit is contained in:
parent
ad2ab61b4e
commit
90c471e73f
@ -76,3 +76,19 @@ void BenchTimer::truncatedEnd() {
|
|||||||
fTruncatedCpu = fTruncatedSysTimer->endCpu() * fDurationScale;
|
fTruncatedCpu = fTruncatedSysTimer->endCpu() * fDurationScale;
|
||||||
fTruncatedWall = fTruncatedSysTimer->endWall() * fDurationScale;
|
fTruncatedWall = fTruncatedSysTimer->endWall() * fDurationScale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WallTimer::WallTimer() : fWall(-1.0), fSysTimer(new BenchSysTimer) {}
|
||||||
|
|
||||||
|
WallTimer::~WallTimer() {
|
||||||
|
delete fSysTimer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WallTimer::start(double durationScale) {
|
||||||
|
fDurationScale = durationScale;
|
||||||
|
fSysTimer->startWall();
|
||||||
|
}
|
||||||
|
|
||||||
|
void WallTimer::end() {
|
||||||
|
fWall = fSysTimer->endWall() * fDurationScale;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -39,12 +39,29 @@ public:
|
|||||||
double fGpu;
|
double fGpu;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BenchSysTimer *fSysTimer;
|
BenchSysTimer* fSysTimer;
|
||||||
BenchSysTimer *fTruncatedSysTimer;
|
BenchSysTimer* fTruncatedSysTimer;
|
||||||
#if SK_SUPPORT_GPU
|
#if SK_SUPPORT_GPU
|
||||||
BenchGpuTimer *fGpuTimer;
|
BenchGpuTimer* fGpuTimer;
|
||||||
#endif
|
#endif
|
||||||
double fDurationScale; // for this start/end session
|
double fDurationScale; // for this start/end session
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Same as BenchTimer above, supporting only fWall but with much lower overhead.
|
||||||
|
// (Typically, ~30ns instead of BenchTimer's ~1us.)
|
||||||
|
class WallTimer {
|
||||||
|
public:
|
||||||
|
WallTimer();
|
||||||
|
~WallTimer();
|
||||||
|
|
||||||
|
void start(double durationScale = 1);
|
||||||
|
void end();
|
||||||
|
|
||||||
|
double fWall;
|
||||||
|
|
||||||
|
private:
|
||||||
|
BenchSysTimer* fSysTimer;
|
||||||
|
double fDurationScale;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
32
tools/Stats.h
Normal file
32
tools/Stats.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef Stats_DEFINED
|
||||||
|
#define Stats_DEFINED
|
||||||
|
|
||||||
|
struct Stats {
|
||||||
|
Stats(const double samples[], int n) {
|
||||||
|
min = samples[0];
|
||||||
|
max = samples[0];
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (samples[i] < min) { min = samples[i]; }
|
||||||
|
if (samples[i] > max) { max = samples[i]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 0 ; i < n; i++) {
|
||||||
|
sum += samples[i];
|
||||||
|
}
|
||||||
|
mean = sum / n;
|
||||||
|
|
||||||
|
double err = 0.0;
|
||||||
|
for (int i = 0 ; i < n; i++) {
|
||||||
|
err += (samples[i] - mean) * (samples[i] - mean);
|
||||||
|
}
|
||||||
|
var = err / (n-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
double min;
|
||||||
|
double max;
|
||||||
|
double mean; // Estimate of population mean.
|
||||||
|
double var; // Estimate of population variance.
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif//Stats_DEFINED
|
@ -5,7 +5,6 @@
|
|||||||
* found in the LICENSE file.
|
* found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "BenchTimer.h"
|
|
||||||
#include "SkCommandLineFlags.h"
|
#include "SkCommandLineFlags.h"
|
||||||
#include "SkForceLinking.h"
|
#include "SkForceLinking.h"
|
||||||
#include "SkGraphics.h"
|
#include "SkGraphics.h"
|
||||||
@ -16,19 +15,27 @@
|
|||||||
#include "SkStream.h"
|
#include "SkStream.h"
|
||||||
#include "SkString.h"
|
#include "SkString.h"
|
||||||
|
|
||||||
|
#include "BenchTimer.h"
|
||||||
|
#include "Stats.h"
|
||||||
|
|
||||||
|
typedef WallTimer Timer;
|
||||||
|
|
||||||
__SK_FORCE_IMAGE_DECODER_LINKING;
|
__SK_FORCE_IMAGE_DECODER_LINKING;
|
||||||
|
|
||||||
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
|
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to playback.");
|
||||||
DEFINE_int32(loops, 10, "Number of times to play back each SKP.");
|
DEFINE_int32(samples, 10, "Gather this many samples of each picture playback.");
|
||||||
DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture.");
|
DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture.");
|
||||||
DEFINE_int32(tile, 1000000000, "Simulated tile size.");
|
DEFINE_int32(tile, 1000000000, "Simulated tile size.");
|
||||||
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
||||||
DEFINE_string(timescale, "ms", "Print times in ms, us, or ns");
|
DEFINE_string(timescale, "ms", "Print times in ms, us, or ns");
|
||||||
|
DEFINE_int32(verbose, 0, "0: print min sample; "
|
||||||
|
"1: print min, mean, max and noise indication "
|
||||||
|
"2: print all samples");
|
||||||
|
|
||||||
static double scale_time(double ms) {
|
static double timescale() {
|
||||||
if (FLAGS_timescale.contains("us")) ms *= 1000;
|
if (FLAGS_timescale.contains("us")) return 1000;
|
||||||
if (FLAGS_timescale.contains("ns")) ms *= 1000000;
|
if (FLAGS_timescale.contains("ns")) return 1000000;
|
||||||
return ms;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static SkPicture* rerecord_with_tilegrid(SkPicture& src) {
|
static SkPicture* rerecord_with_tilegrid(SkPicture& src) {
|
||||||
@ -49,6 +56,14 @@ static EXPERIMENTAL::SkPlayback* rerecord_with_skr(SkPicture& src) {
|
|||||||
return recording.releasePlayback();
|
return recording.releasePlayback();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void draw(const EXPERIMENTAL::SkPlayback& skr, const SkPicture& skp, SkCanvas* canvas) {
|
||||||
|
if (FLAGS_skr) {
|
||||||
|
skr.draw(canvas);
|
||||||
|
} else {
|
||||||
|
skp.draw(canvas);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
|
static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
|
||||||
SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src));
|
SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src));
|
||||||
SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src));
|
SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src));
|
||||||
@ -59,19 +74,34 @@ static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
|
|||||||
src.width() * sizeof(SkPMColor)));
|
src.width() * sizeof(SkPMColor)));
|
||||||
canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile)));
|
canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile)));
|
||||||
|
|
||||||
BenchTimer timer;
|
// Draw once to warm any caches. The first sample otherwise can be very noisy.
|
||||||
timer.start();
|
draw(*record, *picture, canvas.get());
|
||||||
for (int i = 0; i < FLAGS_loops; i++) {
|
|
||||||
if (FLAGS_skr) {
|
|
||||||
record->draw(canvas.get());
|
|
||||||
} else {
|
|
||||||
picture->draw(canvas.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
timer.end();
|
|
||||||
|
|
||||||
const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
|
Timer timer;
|
||||||
printf("%f\t%s\n", scale_time(msPerLoop), name);
|
SkAutoTMalloc<double> samples(FLAGS_samples);
|
||||||
|
for (int i = 0; i < FLAGS_samples; i++) {
|
||||||
|
// We assume timer overhead (typically, ~30ns) is insignificant
|
||||||
|
// compared to draw runtime (at least ~100us, usually several ms).
|
||||||
|
timer.start(timescale());
|
||||||
|
draw(*record, *picture, canvas.get());
|
||||||
|
timer.end();
|
||||||
|
samples[i] = timer.fWall;
|
||||||
|
}
|
||||||
|
|
||||||
|
Stats stats(samples.get(), FLAGS_samples);
|
||||||
|
if (FLAGS_verbose == 0) {
|
||||||
|
printf("%g\t%s\n", stats.min, name);
|
||||||
|
} else if (FLAGS_verbose == 1) {
|
||||||
|
// Get a rough idea of how noisy the measurements were.
|
||||||
|
const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
|
||||||
|
printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
|
||||||
|
} else if (FLAGS_verbose == 2) {
|
||||||
|
printf("%s", name);
|
||||||
|
for (int i = 0; i < FLAGS_samples; i++) {
|
||||||
|
printf("\t%g", samples[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int tool_main(int argc, char** argv);
|
int tool_main(int argc, char** argv);
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
* found in the LICENSE file.
|
* found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "BenchTimer.h"
|
|
||||||
#include "SkCommandLineFlags.h"
|
#include "SkCommandLineFlags.h"
|
||||||
#include "SkForceLinking.h"
|
#include "SkForceLinking.h"
|
||||||
#include "SkGraphics.h"
|
#include "SkGraphics.h"
|
||||||
@ -15,28 +14,32 @@
|
|||||||
#include "SkRecording.h"
|
#include "SkRecording.h"
|
||||||
#include "SkStream.h"
|
#include "SkStream.h"
|
||||||
#include "SkString.h"
|
#include "SkString.h"
|
||||||
|
|
||||||
|
#include "BenchTimer.h"
|
||||||
#include "LazyDecodeBitmap.h"
|
#include "LazyDecodeBitmap.h"
|
||||||
|
#include "Stats.h"
|
||||||
|
|
||||||
|
typedef WallTimer Timer;
|
||||||
|
|
||||||
__SK_FORCE_IMAGE_DECODER_LINKING;
|
__SK_FORCE_IMAGE_DECODER_LINKING;
|
||||||
|
|
||||||
// Just reading all the SKPs takes about 2 seconds for me, which is the same as about 100 loops of
|
|
||||||
// rerecording all the SKPs. So we default to --loops=900, which makes ~90% of our time spent in
|
|
||||||
// recording, and this should take ~20 seconds to run.
|
|
||||||
|
|
||||||
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
|
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
|
||||||
DEFINE_int32(loops, 900, "Number of times to re-record each SKP.");
|
DEFINE_int32(samples, 10, "Number of times to re-record each SKP.");
|
||||||
DEFINE_bool(endRecording, true, "If false, don't time SkPicture::endRecording()");
|
|
||||||
DEFINE_int32(nullSize, 1000, "Pretend dimension of null source picture.");
|
|
||||||
DEFINE_int32(tileGridSize, 512, "Set the tile grid size. Has no effect if bbh is not set to tilegrid.");
|
DEFINE_int32(tileGridSize, 512, "Set the tile grid size. Has no effect if bbh is not set to tilegrid.");
|
||||||
DEFINE_string(bbh, "", "Turn on the bbh and select the type, one of rtree, tilegrid, quadtree");
|
DEFINE_string(bbh, "", "Turn on the bbh and select the type, one of rtree, tilegrid, quadtree");
|
||||||
DEFINE_bool(skr, false, "Record SKR instead of SKP.");
|
DEFINE_bool(skr, false, "Record SKR instead of SKP.");
|
||||||
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
||||||
DEFINE_string(timescale, "us", "Print times in ms, us, or ns");
|
DEFINE_string(timescale, "us", "Print times in ms, us, or ns");
|
||||||
|
DEFINE_double(overheadGoal, 0.0001,
|
||||||
|
"Try to make timer overhead at most this fraction of our sample measurements.");
|
||||||
|
DEFINE_int32(verbose, 0, "0: print min sample; "
|
||||||
|
"1: print min, mean, max and noise indication "
|
||||||
|
"2: print all samples");
|
||||||
|
|
||||||
static double scale_time(double ms) {
|
static double timescale() {
|
||||||
if (FLAGS_timescale.contains("us")) ms *= 1000;
|
if (FLAGS_timescale.contains("us")) return 1000;
|
||||||
if (FLAGS_timescale.contains("ns")) ms *= 1000000;
|
if (FLAGS_timescale.contains("ns")) return 1000000;
|
||||||
return ms;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static SkBBHFactory* parse_FLAGS_bbh() {
|
static SkBBHFactory* parse_FLAGS_bbh() {
|
||||||
@ -61,35 +64,62 @@ static SkBBHFactory* parse_FLAGS_bbh() {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) {
|
static void rerecord(const SkPicture& src, SkBBHFactory* bbhFactory) {
|
||||||
BenchTimer timer;
|
if (FLAGS_skr) {
|
||||||
timer.start();
|
EXPERIMENTAL::SkRecording recording(src.width(), src.height());
|
||||||
const int width = src ? src->width() : FLAGS_nullSize;
|
src.draw(recording.canvas());
|
||||||
const int height = src ? src->height() : FLAGS_nullSize;
|
// Release and delete the SkPlayback so that recording optimizes its SkRecord.
|
||||||
|
SkDELETE(recording.releasePlayback());
|
||||||
for (int i = 0; i < FLAGS_loops; i++) {
|
} else {
|
||||||
if (FLAGS_skr) {
|
SkPictureRecorder recorder;
|
||||||
EXPERIMENTAL::SkRecording recording(width, height);
|
src.draw(recorder.beginRecording(src.width(), src.height(), bbhFactory));
|
||||||
if (NULL != src) {
|
SkAutoTUnref<SkPicture> dst(recorder.endRecording());
|
||||||
src->draw(recording.canvas());
|
|
||||||
}
|
|
||||||
// Release and delete the SkPlayback so that recording optimizes its SkRecord.
|
|
||||||
SkDELETE(recording.releasePlayback());
|
|
||||||
} else {
|
|
||||||
SkPictureRecorder recorder;
|
|
||||||
SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory);
|
|
||||||
if (NULL != src) {
|
|
||||||
src->draw(canvas);
|
|
||||||
}
|
|
||||||
if (FLAGS_endRecording) {
|
|
||||||
SkAutoTUnref<SkPicture> dst(recorder.endRecording());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
timer.end();
|
}
|
||||||
|
|
||||||
const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
|
static void bench_record(const SkPicture& src,
|
||||||
printf("%f\t%s\n", scale_time(msPerLoop), name);
|
const double timerOverhead,
|
||||||
|
const char* name,
|
||||||
|
SkBBHFactory* bbhFactory) {
|
||||||
|
// Rerecord once to warm up any caches. Otherwise the first sample can be very noisy.
|
||||||
|
rerecord(src, bbhFactory);
|
||||||
|
|
||||||
|
// Rerecord once to see how many times we should loop to make timer overhead insignificant.
|
||||||
|
Timer timer;
|
||||||
|
do {
|
||||||
|
timer.start(timescale());
|
||||||
|
rerecord(src, bbhFactory);
|
||||||
|
timer.end();
|
||||||
|
} while (timer.fWall < timerOverhead); // Loop just in case something bizarre happens.
|
||||||
|
|
||||||
|
// We want (timer overhead / measurement) to be less than FLAGS_overheadGoal.
|
||||||
|
// So in each sample, we'll loop enough times to have made that true for our first measurement.
|
||||||
|
const int loops = (int)ceil(timerOverhead / timer.fWall / FLAGS_overheadGoal);
|
||||||
|
|
||||||
|
SkAutoTMalloc<double> samples(FLAGS_samples);
|
||||||
|
for (int i = 0; i < FLAGS_samples; i++) {
|
||||||
|
timer.start(timescale());
|
||||||
|
for (int j = 0; j < loops; j++) {
|
||||||
|
rerecord(src, bbhFactory);
|
||||||
|
}
|
||||||
|
timer.end();
|
||||||
|
samples[i] = timer.fWall / loops;
|
||||||
|
}
|
||||||
|
|
||||||
|
Stats stats(samples.get(), FLAGS_samples);
|
||||||
|
if (FLAGS_verbose == 0) {
|
||||||
|
printf("%g\t%s\n", stats.min, name);
|
||||||
|
} else if (FLAGS_verbose == 1) {
|
||||||
|
// Get a rough idea of how noisy the measurements were.
|
||||||
|
const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
|
||||||
|
printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
|
||||||
|
} else if (FLAGS_verbose == 2) {
|
||||||
|
printf("%s", name);
|
||||||
|
for (int i = 0; i < FLAGS_samples; i++) {
|
||||||
|
printf("\t%g", samples[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int tool_main(int argc, char** argv);
|
int tool_main(int argc, char** argv);
|
||||||
@ -103,10 +133,17 @@ int tool_main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh());
|
SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh());
|
||||||
bench_record(NULL, "NULL", bbhFactory.get());
|
|
||||||
if (FLAGS_skps.isEmpty()) {
|
// Each run will use this timer overhead estimate to guess how many times it should run.
|
||||||
return 0;
|
static const int kOverheadLoops = 10000000;
|
||||||
|
Timer timer;
|
||||||
|
double overheadEstimate = 0.0;
|
||||||
|
for (int i = 0; i < kOverheadLoops; i++) {
|
||||||
|
timer.start(timescale());
|
||||||
|
timer.end();
|
||||||
|
overheadEstimate += timer.fWall;
|
||||||
}
|
}
|
||||||
|
overheadEstimate /= kOverheadLoops;
|
||||||
|
|
||||||
SkOSFile::Iter it(FLAGS_skps[0], ".skp");
|
SkOSFile::Iter it(FLAGS_skps[0], ".skp");
|
||||||
SkString filename;
|
SkString filename;
|
||||||
@ -131,7 +168,7 @@ int tool_main(int argc, char** argv) {
|
|||||||
failed = true;
|
failed = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
bench_record(src, filename.c_str(), bbhFactory.get());
|
bench_record(*src, overheadEstimate, filename.c_str(), bbhFactory.get());
|
||||||
}
|
}
|
||||||
return failed ? 1 : 0;
|
return failed ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user