2017-03-25 15:29:41 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2017 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
ok, add a bench source
This new source acts like other sources (GMs, SKPs) for benchmarks. It
times multiple samples (controlled by samples=N, default 20), and each
of those samples uses the same strategy as monobench, growing loops
exponentially until it runs for at least 10ms.
When done it prints the fastest and the two slowest samples. In
practice the 100th percentile sample is very different from the
next slowest due to caching, and the fastest is always interesting.
Because these benchmarks run in whatever execution engine ok has
selected, on non-Windows platforms you have some real control over the
interaction between benchmarks. In its default "fork" mode each
benchmark runs independently in its own process, so the 100th
percentiles really stand out. The other modes "thread" and "serial"
work as you'd expect too.
Here's an example where you can see how the different interactions work:
out/ok bench:samples=100 8888 filter:search=text_16_AA fork
[text_16_AA_WT] 2.32µs @0 6.23µs @99 24.3ms @100
[text_16_AA_FF] 2.41µs @0 5.7µs @99 23.3ms @100
[text_16_AA_88] 2.55µs @0 5.6µs @99 24.8ms @100
[text_16_AA_BK] 1.97µs @0 5.44µs @99 23.2ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA thread
[text_16_AA_FF] 2.45µs @0 23.5µs @99 24.8ms @100
[text_16_AA_WT] 2.52µs @0 17.8µs @99 24.7ms @100
[text_16_AA_88] 2.55µs @0 19.7µs @99 25.1ms @100
[text_16_AA_BK] 1.8µs @0 14.7µs @99 25.1ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA serial
[text_16_AA_88] 2.35µs @0 3.53µs @99 16.7ms @100
[text_16_AA_FF] 2.09µs @0 2.73µs @99 2.91µs @100
[text_16_AA_BK] 1.75µs @0 2.46µs @99 2.65µs @100
[text_16_AA_WT] 2.1µs @0 3.16µs @99 3.17µs @100
In the first "fork" case all runs are independent and have roughly
the same profile. "thread" looks similar except you can see them
contending at the 99th percentile. In "serial", the first bench
warms up the rest, so their 100th percentiles are all much faster.
Change-Id: I01a9f8c54b540221a9f232b271bb8ef3fda2569c
Reviewed-on: https://skia-review.googlesource.com/33585
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-08-11 14:37:35 +00:00
|
|
|
#include "Benchmark.h"
|
2017-05-03 19:16:58 +00:00
|
|
|
#include "SkData.h"
|
2017-03-25 15:29:41 +00:00
|
|
|
#include "SkOSFile.h"
|
|
|
|
#include "SkPicture.h"
|
ok, add a bench source
This new source acts like other sources (GMs, SKPs) for benchmarks. It
times multiple samples (controlled by samples=N, default 20), and each
of those samples uses the same strategy as monobench, growing loops
exponentially until it runs for at least 10ms.
When done it prints the fastest and the two slowest samples. In
practice the 100th percentile sample is very different from the
next slowest due to caching, and the fastest is always interesting.
Because these benchmarks run in whatever execution engine ok has
selected, on non-Windows platforms you have some real control over the
interaction between benchmarks. In its default "fork" mode each
benchmark runs independently in its own process, so the 100th
percentiles really stand out. The other modes "thread" and "serial"
work as you'd expect too.
Here's an example where you can see how the different interactions work:
out/ok bench:samples=100 8888 filter:search=text_16_AA fork
[text_16_AA_WT] 2.32µs @0 6.23µs @99 24.3ms @100
[text_16_AA_FF] 2.41µs @0 5.7µs @99 23.3ms @100
[text_16_AA_88] 2.55µs @0 5.6µs @99 24.8ms @100
[text_16_AA_BK] 1.97µs @0 5.44µs @99 23.2ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA thread
[text_16_AA_FF] 2.45µs @0 23.5µs @99 24.8ms @100
[text_16_AA_WT] 2.52µs @0 17.8µs @99 24.7ms @100
[text_16_AA_88] 2.55µs @0 19.7µs @99 25.1ms @100
[text_16_AA_BK] 1.8µs @0 14.7µs @99 25.1ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA serial
[text_16_AA_88] 2.35µs @0 3.53µs @99 16.7ms @100
[text_16_AA_FF] 2.09µs @0 2.73µs @99 2.91µs @100
[text_16_AA_BK] 1.75µs @0 2.46µs @99 2.65µs @100
[text_16_AA_WT] 2.1µs @0 3.16µs @99 3.17µs @100
In the first "fork" case all runs are independent and have roughly
the same profile. "thread" looks similar except you can see them
contending at the 99th percentile. In "serial", the first bench
warms up the rest, so their 100th percentiles are all much faster.
Change-Id: I01a9f8c54b540221a9f232b271bb8ef3fda2569c
Reviewed-on: https://skia-review.googlesource.com/33585
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-08-11 14:37:35 +00:00
|
|
|
#include "Timer.h"
|
|
|
|
#include "gm.h"
|
|
|
|
#include "ok.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <chrono>
|
|
|
|
#include <limits>
|
2017-08-16 01:55:33 +00:00
|
|
|
#include <stdlib.h>
|
2017-03-25 15:29:41 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
struct GMStream : Stream {
|
|
|
|
const skiagm::GMRegistry* registry = skiagm::GMRegistry::Head();
|
|
|
|
|
|
|
|
static std::unique_ptr<Stream> Create(Options) {
|
|
|
|
GMStream stream;
|
|
|
|
return move_unique(stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct GMSrc : Src {
|
|
|
|
skiagm::GM* (*factory)(void*);
|
|
|
|
std::unique_ptr<skiagm::GM> gm;
|
|
|
|
|
2017-03-25 19:53:14 +00:00
|
|
|
void init() {
|
|
|
|
if (gm) { return; }
|
2017-03-25 15:29:41 +00:00
|
|
|
gm.reset(factory(nullptr));
|
2017-03-25 19:53:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string name() override {
|
|
|
|
this->init();
|
2017-03-25 15:29:41 +00:00
|
|
|
return gm->getName();
|
|
|
|
}
|
|
|
|
|
|
|
|
SkISize size() override {
|
2017-03-25 19:53:14 +00:00
|
|
|
this->init();
|
2017-03-25 15:29:41 +00:00
|
|
|
return gm->getISize();
|
|
|
|
}
|
|
|
|
|
2017-03-29 16:41:13 +00:00
|
|
|
Status draw(SkCanvas* canvas) override {
|
2017-03-25 19:53:14 +00:00
|
|
|
this->init();
|
2017-03-25 15:29:41 +00:00
|
|
|
canvas->clear(0xffffffff);
|
|
|
|
gm->draw(canvas);
|
2017-03-29 16:41:13 +00:00
|
|
|
return Status::OK;
|
2017-03-25 15:29:41 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Src> next() override {
|
|
|
|
if (!registry) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
GMSrc src;
|
|
|
|
src.factory = registry->factory();
|
|
|
|
registry = registry->next();
|
|
|
|
return move_unique(src);
|
|
|
|
}
|
|
|
|
};
|
2017-03-29 16:41:13 +00:00
|
|
|
static Register gm{"gm", "draw GMs linked into this binary", GMStream::Create};
|
2017-03-25 15:29:41 +00:00
|
|
|
|
|
|
|
struct SKPStream : Stream {
|
|
|
|
std::string dir;
|
|
|
|
std::vector<std::string> skps;
|
|
|
|
|
|
|
|
static std::unique_ptr<Stream> Create(Options options) {
|
|
|
|
SKPStream stream;
|
|
|
|
stream.dir = options("dir", "skps");
|
|
|
|
SkOSFile::Iter it{stream.dir.c_str(), ".skp"};
|
|
|
|
for (SkString path; it.next(&path); ) {
|
|
|
|
stream.skps.push_back(path.c_str());
|
|
|
|
}
|
|
|
|
return move_unique(stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct SKPSrc : Src {
|
|
|
|
std::string dir, path;
|
|
|
|
sk_sp<SkPicture> pic;
|
|
|
|
|
2017-03-25 19:53:14 +00:00
|
|
|
void init() {
|
|
|
|
if (pic) { return; }
|
|
|
|
auto skp = SkData::MakeFromFileName((dir+"/"+path).c_str());
|
|
|
|
pic = SkPicture::MakeFromData(skp.get());
|
|
|
|
}
|
|
|
|
|
2017-03-25 15:29:41 +00:00
|
|
|
std::string name() override {
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkISize size() override {
|
2017-03-25 19:53:14 +00:00
|
|
|
this->init();
|
2017-03-25 15:29:41 +00:00
|
|
|
return pic->cullRect().roundOut().size();
|
|
|
|
}
|
|
|
|
|
2017-03-29 16:41:13 +00:00
|
|
|
Status draw(SkCanvas* canvas) override {
|
2017-03-25 19:53:14 +00:00
|
|
|
this->init();
|
2017-03-25 15:29:41 +00:00
|
|
|
canvas->clear(0xffffffff);
|
|
|
|
pic->playback(canvas);
|
2017-03-29 16:41:13 +00:00
|
|
|
return Status::OK;
|
2017-03-25 15:29:41 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Src> next() override {
|
|
|
|
if (skps.empty()) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
SKPSrc src;
|
|
|
|
src.dir = dir;
|
|
|
|
src.path = skps.back();
|
|
|
|
skps.pop_back();
|
|
|
|
return move_unique(src);
|
|
|
|
}
|
|
|
|
};
|
2017-03-29 16:41:13 +00:00
|
|
|
static Register skp{"skp", "draw SKPs from dir=skps", SKPStream::Create};
|
ok, add a bench source
This new source acts like other sources (GMs, SKPs) for benchmarks. It
times multiple samples (controlled by samples=N, default 20), and each
of those samples uses the same strategy as monobench, growing loops
exponentially until it runs for at least 10ms.
When done it prints the fastest and the two slowest samples. In
practice the 100th percentile sample is very different from the
next slowest due to caching, and the fastest is always interesting.
Because these benchmarks run in whatever execution engine ok has
selected, on non-Windows platforms you have some real control over the
interaction between benchmarks. In its default "fork" mode each
benchmark runs independently in its own process, so the 100th
percentiles really stand out. The other modes "thread" and "serial"
work as you'd expect too.
Here's an example where you can see how the different interactions work:
out/ok bench:samples=100 8888 filter:search=text_16_AA fork
[text_16_AA_WT] 2.32µs @0 6.23µs @99 24.3ms @100
[text_16_AA_FF] 2.41µs @0 5.7µs @99 23.3ms @100
[text_16_AA_88] 2.55µs @0 5.6µs @99 24.8ms @100
[text_16_AA_BK] 1.97µs @0 5.44µs @99 23.2ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA thread
[text_16_AA_FF] 2.45µs @0 23.5µs @99 24.8ms @100
[text_16_AA_WT] 2.52µs @0 17.8µs @99 24.7ms @100
[text_16_AA_88] 2.55µs @0 19.7µs @99 25.1ms @100
[text_16_AA_BK] 1.8µs @0 14.7µs @99 25.1ms @100
out/ok bench:samples=100 8888 filter:search=text_16_AA serial
[text_16_AA_88] 2.35µs @0 3.53µs @99 16.7ms @100
[text_16_AA_FF] 2.09µs @0 2.73µs @99 2.91µs @100
[text_16_AA_BK] 1.75µs @0 2.46µs @99 2.65µs @100
[text_16_AA_WT] 2.1µs @0 3.16µs @99 3.17µs @100
In the first "fork" case all runs are independent and have roughly
the same profile. "thread" looks similar except you can see them
contending at the 99th percentile. In "serial", the first bench
warms up the rest, so their 100th percentiles are all much faster.
Change-Id: I01a9f8c54b540221a9f232b271bb8ef3fda2569c
Reviewed-on: https://skia-review.googlesource.com/33585
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-08-11 14:37:35 +00:00
|
|
|
|
|
|
|
struct BenchStream : Stream {
|
|
|
|
const BenchRegistry* registry = BenchRegistry::Head();
|
|
|
|
int samples;
|
|
|
|
|
|
|
|
static std::unique_ptr<Stream> Create(Options options) {
|
|
|
|
BenchStream stream;
|
|
|
|
stream.samples = std::max(1, atoi(options("samples", "20").c_str()));
|
|
|
|
return move_unique(stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct BenchSrc : Src {
|
|
|
|
Benchmark* (*factory)(void*);
|
|
|
|
std::unique_ptr<Benchmark> bench;
|
|
|
|
int samples;
|
|
|
|
|
|
|
|
void init() {
|
|
|
|
if (bench) { return; }
|
|
|
|
bench.reset(factory(nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string name() override {
|
|
|
|
this->init();
|
|
|
|
return bench->getName();
|
|
|
|
}
|
|
|
|
|
|
|
|
SkISize size() override {
|
|
|
|
this->init();
|
|
|
|
return { bench->getSize().x(), bench->getSize().y() };
|
|
|
|
}
|
|
|
|
|
|
|
|
Status draw(SkCanvas* canvas) override {
|
|
|
|
this->init();
|
|
|
|
|
|
|
|
using ms = std::chrono::duration<double, std::milli>;
|
|
|
|
std::vector<ms> sample(samples);
|
|
|
|
|
|
|
|
bench->delayedSetup();
|
|
|
|
if (canvas) {
|
|
|
|
bench->perCanvasPreDraw(canvas);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < samples; i++) {
|
|
|
|
using clock = std::chrono::high_resolution_clock;
|
|
|
|
for (int loops = 1; loops < 1000000000; loops *= 2) {
|
|
|
|
bench->preDraw(canvas);
|
|
|
|
auto start = clock::now();
|
|
|
|
bench->draw(loops, canvas);
|
|
|
|
ms elapsed = clock::now() - start;
|
|
|
|
bench->postDraw(canvas);
|
|
|
|
|
|
|
|
if (elapsed.count() < 10) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
sample[i] = elapsed / loops;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (canvas) {
|
|
|
|
bench->perCanvasPostDraw(canvas);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(sample.begin(), sample.end());
|
|
|
|
|
|
|
|
SkString msg = SkStringPrintf("%s\t@0", HumanizeMs(sample[0].count()).c_str());
|
|
|
|
if (samples > 2) {
|
|
|
|
msg.appendf("\t%s\t@%g", HumanizeMs(sample[samples-2].count()).c_str()
|
|
|
|
, 100.0*(samples-1) / samples);
|
|
|
|
}
|
|
|
|
if (samples > 1) {
|
|
|
|
msg.appendf("\t%s\t@100", HumanizeMs(sample[samples-1].count()).c_str());
|
|
|
|
}
|
|
|
|
ok_log(msg.c_str());
|
|
|
|
|
|
|
|
return Status::OK;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Src> next() override {
|
|
|
|
if (!registry) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
BenchSrc src;
|
|
|
|
src.factory = registry->factory();
|
|
|
|
src.samples = samples;
|
|
|
|
registry = registry->next();
|
|
|
|
return move_unique(src);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
static Register bench{
|
|
|
|
"bench",
|
|
|
|
"time benchmarks linked into this binary samples=20 times each",
|
|
|
|
BenchStream::Create,
|
|
|
|
};
|