fix SKVM_ benches

Things were running suspiciously well...

_I32 had a typo that cut out 3/4 of its multiplies...

_I32_SWAR was missing a mask operation needed to drop
the junk low byte of the high half after the multiply.

The bench times now make a bit more sense and are in line
with how much work we're actually doing: F32's the slowest,
I32 a little faster, and I32_SWAR fastest:

    curr/maxrss	loops	min	median	mean	max	stddev	samples   	config	bench
      35/36  MB	58	2.03ns	2.04ns	2.04ns	2.04ns	0%	▂▂▂▂▁▁█▁▂▁	nonrendering	SkVM_4096_I32_SWAR
      35/36  MB	42	3.44ns	3.48ns	3.49ns	3.59ns	1%	▂▆▅█▃▃▁▂▂▄	nonrendering	SkVM_4096_I32
      35/36  MB	30	4.9ns	5.21ns	5.11ns	5.33ns	3%	▆▇█▆▆▁▂▁▁▅	nonrendering	SkVM_4096_F32
      35/36  MB	203	0.696ns	0.697ns	0.705ns	0.758ns	3%	█▂▂▁▁▁▁▁▁▂	nonrendering	SkVM_4096_RP
      35/36  MB	942	0.188ns	0.188ns	0.188ns	0.189ns	0%	▂▁▂▁▃█▂▁▁▁	nonrendering	SkVM_4096_Opts

Change-Id: I2850dc3f9df1828f03499eb278b8231f48eaae63
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/217982
Commit-Queue: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
Mike Klein 2019-06-03 14:53:15 -05:00 committed by Skia Commit-Bot
parent ef032cd9bf
commit 03ce675b5f

View File

@ -9,9 +9,6 @@
#include "src/core/SkOpts.h"
#include "src/core/SkVM.h"
// N.B. I have not tested that the math performed by these benchmarks is correct.
// They're really more meant to be representative load. (Wouldn't hurt to be correct though.)
namespace {
enum Mode {Opts, RP, F32, I32, I32_SWAR};
@ -85,9 +82,9 @@ namespace {
skvm::I32 invA = sub(splat(0xff), a);
r = add(r, mul_unorm8(dr, invA));
g = add(g, mul_unorm8(dr, invA));
b = add(b, mul_unorm8(dr, invA));
a = add(a, mul_unorm8(dr, invA));
g = add(g, mul_unorm8(dg, invA));
b = add(b, mul_unorm8(db, invA));
a = add(a, mul_unorm8(da, invA));
store32(dst, bit_or( r ,
bit_or(shl(g, 8),
@ -110,7 +107,10 @@ namespace {
auto mul_unorm8 = [&](skvm::I32 x, skvm::I32 y) {
// As above, assuming x is two SWAR bytes in lanes 0 and 2, and y is a byte.
return shr(add(mul(x, y), splat(0x00ff00ff)), 8);
return bit_and(shr(add(mul(x, y),
splat(0x00ff00ff)),
8),
splat(0x00ff00ff));
};
skvm::I32 rb, ga;
@ -157,6 +157,12 @@ private:
fPipeline.append(SkRasterPipeline::srcover);
fPipeline.append(SkRasterPipeline::store_8888, &fDstCtx);
}
// Trigger one run now so we can do a quick correctness check.
this->draw(1,nullptr);
for (int i = 0; i < fPixels; i++) {
SkASSERT(fDst[i] == 0xff5e6f80);
}
}
void onDraw(int loops, SkCanvas*) override {