2019-05-29 17:57:54 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2019 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "bench/Benchmark.h"
|
|
|
|
#include "src/core/SkOpts.h"
|
|
|
|
#include "src/core/SkVM.h"
|
2019-06-03 22:10:59 +00:00
|
|
|
#include "tools/SkVMBuilders.h"
|
2019-05-29 17:57:54 +00:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2020-04-13 18:26:45 +00:00
|
|
|
enum Mode {Opts, RP, F32, I32_Naive};
|
|
|
|
static const char* kMode_name[] = { "Opts", "RP","F32", "I32_Naive" };
|
2019-05-29 17:57:54 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
class SkVMBench : public Benchmark {
|
|
|
|
public:
|
|
|
|
SkVMBench(int pixels, Mode mode)
|
|
|
|
: fPixels(pixels)
|
|
|
|
, fMode(mode)
|
|
|
|
, fName(SkStringPrintf("SkVM_%d_%s", pixels, kMode_name[mode]))
|
|
|
|
{}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const char* onGetName() override { return fName.c_str(); }
|
|
|
|
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
|
|
|
|
|
|
|
void onDelayedSetup() override {
|
|
|
|
this->setUnits(fPixels);
|
|
|
|
fSrc.resize(fPixels, 0x7f123456); // Arbitrary non-opaque non-transparent value.
|
|
|
|
fDst.resize(fPixels, 0xff987654); // Arbitrary value.
|
|
|
|
|
2019-06-20 16:37:10 +00:00
|
|
|
if (fMode == F32 ) { fProgram = SrcoverBuilder_F32 {}.done(); }
|
|
|
|
if (fMode == I32_Naive) { fProgram = SrcoverBuilder_I32_Naive{}.done(); }
|
2019-05-29 17:57:54 +00:00
|
|
|
|
|
|
|
if (fMode == RP) {
|
|
|
|
fSrcCtx = { fSrc.data(), 0 };
|
|
|
|
fDstCtx = { fDst.data(), 0 };
|
|
|
|
fPipeline.append(SkRasterPipeline::load_8888 , &fSrcCtx);
|
|
|
|
fPipeline.append(SkRasterPipeline::load_8888_dst, &fDstCtx);
|
|
|
|
fPipeline.append(SkRasterPipeline::srcover);
|
|
|
|
fPipeline.append(SkRasterPipeline::store_8888, &fDstCtx);
|
|
|
|
}
|
2019-06-03 19:53:15 +00:00
|
|
|
|
|
|
|
// Trigger one run now so we can do a quick correctness check.
|
|
|
|
this->draw(1,nullptr);
|
|
|
|
for (int i = 0; i < fPixels; i++) {
|
2019-06-21 16:21:35 +00:00
|
|
|
SkASSERTF(fDst[i] == 0xff5e6f80, "Want 0xff5e6f80, got %08x", fDst[i]);
|
2019-06-03 19:53:15 +00:00
|
|
|
}
|
2019-05-29 17:57:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void onDraw(int loops, SkCanvas*) override {
|
|
|
|
while (loops --> 0) {
|
|
|
|
if (fMode == Opts) {
|
|
|
|
SkOpts::blit_row_s32a_opaque(fDst.data(), fSrc.data(), fPixels, 0xff);
|
|
|
|
} else if (fMode == RP) {
|
|
|
|
fPipeline.run(0,0,fPixels,1);
|
|
|
|
} else {
|
|
|
|
fProgram.eval(fPixels, fSrc.data(), fDst.data());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int fPixels;
|
|
|
|
Mode fMode;
|
|
|
|
SkString fName;
|
|
|
|
std::vector<uint32_t> fSrc,
|
|
|
|
fDst;
|
|
|
|
skvm::Program fProgram;
|
|
|
|
|
|
|
|
SkRasterPipeline_MemoryCtx fSrcCtx,
|
|
|
|
fDstCtx;
|
|
|
|
SkRasterPipeline_<256> fPipeline;
|
|
|
|
};
|
|
|
|
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 1, Opts});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 4, Opts});)
|
2019-07-12 20:50:09 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 15, Opts});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 63, Opts});)
|
2019-05-29 17:57:54 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 256, Opts});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{1024, Opts});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{4096, Opts});)
|
|
|
|
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 1, RP});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 4, RP});)
|
2019-07-12 20:50:09 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 15, RP});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 63, RP});)
|
2019-05-29 17:57:54 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 256, RP});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{1024, RP});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{4096, RP});)
|
|
|
|
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 1, F32});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 4, F32});)
|
2019-07-12 20:50:09 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 15, F32});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 63, F32});)
|
2019-05-29 17:57:54 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 256, F32});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{1024, F32});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{4096, F32});)
|
|
|
|
|
2019-06-20 16:37:10 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 1, I32_Naive});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 4, I32_Naive});)
|
2019-07-12 20:50:09 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 15, I32_Naive});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{ 63, I32_Naive});)
|
2019-06-20 16:37:10 +00:00
|
|
|
DEF_BENCH(return (new SkVMBench{ 256, I32_Naive});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{1024, I32_Naive});)
|
|
|
|
DEF_BENCH(return (new SkVMBench{4096, I32_Naive});)
|
|
|
|
|
add SkVM_Overhead bench, simple improvements
This new bench lets us measure the overhead of program building,
optimization, and JITting. Surprisingly, at head the optimization in
Builder::done() takes longer than the JIT.
The new bench clocks in around 40µs on my laptop at head,
then 32µs after switching val_to_reg to be an std::vector,
then 27µs after switching deaths to be an std::vector too,
then 22µs after switching fIndex to be an SkTHashMap,
then 20µs after calling program.reserve(fProgram.size()),
then 19µs after switching JIT data maps to SkTHashMap too.
I tried swapping some std::vector for SkTDArray to no benefit, actually
a little detriment. So I think this is roughly all the low-hanging
fruit, with time split now roughly equally between Builder::Done(),
JITting in Program::eval(), and the original calls to Builder
themselves.
Also disable perf dumps on Mac. No real value there until I can dump a
dylib, and it's just one more thing I have to remember to disable before
running this sort of benchmark.
Change-Id: I1c6e58ed00ac94ad622c7d740712634f60787102
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/222984
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-06-22 13:35:13 +00:00
|
|
|
class SkVM_Overhead : public Benchmark {
|
|
|
|
public:
|
2019-06-24 19:00:35 +00:00
|
|
|
explicit SkVM_Overhead(bool rp) : fRP(rp) {}
|
add SkVM_Overhead bench, simple improvements
This new bench lets us measure the overhead of program building,
optimization, and JITting. Surprisingly, at head the optimization in
Builder::done() takes longer than the JIT.
The new bench clocks in around 40µs on my laptop at head,
then 32µs after switching val_to_reg to be an std::vector,
then 27µs after switching deaths to be an std::vector too,
then 22µs after switching fIndex to be an SkTHashMap,
then 20µs after calling program.reserve(fProgram.size()),
then 19µs after switching JIT data maps to SkTHashMap too.
I tried swapping some std::vector for SkTDArray to no benefit, actually
a little detriment. So I think this is roughly all the low-hanging
fruit, with time split now roughly equally between Builder::Done(),
JITting in Program::eval(), and the original calls to Builder
themselves.
Also disable perf dumps on Mac. No real value there until I can dump a
dylib, and it's just one more thing I have to remember to disable before
running this sort of benchmark.
Change-Id: I1c6e58ed00ac94ad622c7d740712634f60787102
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/222984
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-06-22 13:35:13 +00:00
|
|
|
|
|
|
|
private:
|
2019-06-24 19:00:35 +00:00
|
|
|
const char* onGetName() override { return fRP ? "SkVM_Overhead_RP" : "SkVM_Overhead_VM"; }
|
add SkVM_Overhead bench, simple improvements
This new bench lets us measure the overhead of program building,
optimization, and JITting. Surprisingly, at head the optimization in
Builder::done() takes longer than the JIT.
The new bench clocks in around 40µs on my laptop at head,
then 32µs after switching val_to_reg to be an std::vector,
then 27µs after switching deaths to be an std::vector too,
then 22µs after switching fIndex to be an SkTHashMap,
then 20µs after calling program.reserve(fProgram.size()),
then 19µs after switching JIT data maps to SkTHashMap too.
I tried swapping some std::vector for SkTDArray to no benefit, actually
a little detriment. So I think this is roughly all the low-hanging
fruit, with time split now roughly equally between Builder::Done(),
JITting in Program::eval(), and the original calls to Builder
themselves.
Also disable perf dumps on Mac. No real value there until I can dump a
dylib, and it's just one more thing I have to remember to disable before
running this sort of benchmark.
Change-Id: I1c6e58ed00ac94ad622c7d740712634f60787102
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/222984
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-06-22 13:35:13 +00:00
|
|
|
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
|
|
|
|
|
|
|
void onDraw(int loops, SkCanvas*) override {
|
2019-06-24 19:00:35 +00:00
|
|
|
float dummy;
|
|
|
|
if (fRP) {
|
|
|
|
while (loops --> 0) {
|
|
|
|
SkRasterPipeline_<256> rp;
|
|
|
|
SkRasterPipeline_MemoryCtx src = { &dummy, 0},
|
|
|
|
dst = { &dummy, 0};
|
|
|
|
rp.append_load (SkColorType::kRGBA_F32_SkColorType, &src);
|
|
|
|
rp.append_load_dst(SkColorType::kRGBA_F32_SkColorType, &dst);
|
|
|
|
rp.append (SkRasterPipeline::srcover);
|
|
|
|
rp.append_store (SkColorType::kRGBA_F32_SkColorType, &dst);
|
|
|
|
|
|
|
|
(void)rp.compile();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
while (loops --> 0) {
|
|
|
|
skvm::Program program = SrcoverBuilder_F32{}.done();
|
|
|
|
program.eval(0, &dummy, &dummy);
|
|
|
|
}
|
add SkVM_Overhead bench, simple improvements
This new bench lets us measure the overhead of program building,
optimization, and JITting. Surprisingly, at head the optimization in
Builder::done() takes longer than the JIT.
The new bench clocks in around 40µs on my laptop at head,
then 32µs after switching val_to_reg to be an std::vector,
then 27µs after switching deaths to be an std::vector too,
then 22µs after switching fIndex to be an SkTHashMap,
then 20µs after calling program.reserve(fProgram.size()),
then 19µs after switching JIT data maps to SkTHashMap too.
I tried swapping some std::vector for SkTDArray to no benefit, actually
a little detriment. So I think this is roughly all the low-hanging
fruit, with time split now roughly equally between Builder::Done(),
JITting in Program::eval(), and the original calls to Builder
themselves.
Also disable perf dumps on Mac. No real value there until I can dump a
dylib, and it's just one more thing I have to remember to disable before
running this sort of benchmark.
Change-Id: I1c6e58ed00ac94ad622c7d740712634f60787102
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/222984
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-06-22 13:35:13 +00:00
|
|
|
}
|
|
|
|
}
|
2019-06-24 19:00:35 +00:00
|
|
|
|
|
|
|
bool fRP;
|
add SkVM_Overhead bench, simple improvements
This new bench lets us measure the overhead of program building,
optimization, and JITting. Surprisingly, at head the optimization in
Builder::done() takes longer than the JIT.
The new bench clocks in around 40µs on my laptop at head,
then 32µs after switching val_to_reg to be an std::vector,
then 27µs after switching deaths to be an std::vector too,
then 22µs after switching fIndex to be an SkTHashMap,
then 20µs after calling program.reserve(fProgram.size()),
then 19µs after switching JIT data maps to SkTHashMap too.
I tried swapping some std::vector for SkTDArray to no benefit, actually
a little detriment. So I think this is roughly all the low-hanging
fruit, with time split now roughly equally between Builder::Done(),
JITting in Program::eval(), and the original calls to Builder
themselves.
Also disable perf dumps on Mac. No real value there until I can dump a
dylib, and it's just one more thing I have to remember to disable before
running this sort of benchmark.
Change-Id: I1c6e58ed00ac94ad622c7d740712634f60787102
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/222984
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-06-22 13:35:13 +00:00
|
|
|
};
|
2019-06-24 19:00:35 +00:00
|
|
|
DEF_BENCH(return new SkVM_Overhead{ true};)
|
|
|
|
DEF_BENCH(return new SkVM_Overhead{false};)
|