fix SKVM_ benches
Things were running suspiciously well... _I32 had a typo that cut out 3/4 of its multiplies... _I32_SWAR was missing a mask operation needed to drop the junk low byte of the high half after the multiply. The bench times now make a bit more sense and are in line with how much work we're actually doing: F32's the slowest, I32 a little faster, and I32_SWAR fastest: curr/maxrss loops min median mean max stddev samples config bench 35/36 MB 58 2.03ns 2.04ns 2.04ns 2.04ns 0% ▂▂▂▂▁▁█▁▂▁ nonrendering SkVM_4096_I32_SWAR 35/36 MB 42 3.44ns 3.48ns 3.49ns 3.59ns 1% ▂▆▅█▃▃▁▂▂▄ nonrendering SkVM_4096_I32 35/36 MB 30 4.9ns 5.21ns 5.11ns 5.33ns 3% ▆▇█▆▆▁▂▁▁▅ nonrendering SkVM_4096_F32 35/36 MB 203 0.696ns 0.697ns 0.705ns 0.758ns 3% █▂▂▁▁▁▁▁▁▂ nonrendering SkVM_4096_RP 35/36 MB 942 0.188ns 0.188ns 0.188ns 0.189ns 0% ▂▁▂▁▃█▂▁▁▁ nonrendering SkVM_4096_Opts Change-Id: I2850dc3f9df1828f03499eb278b8231f48eaae63 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/217982 Commit-Queue: Mike Klein <mtklein@google.com> Commit-Queue: Brian Osman <brianosman@google.com> Auto-Submit: Mike Klein <mtklein@google.com> Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
ef032cd9bf
commit
03ce675b5f
@ -9,9 +9,6 @@
|
||||
#include "src/core/SkOpts.h"
|
||||
#include "src/core/SkVM.h"
|
||||
|
||||
// N.B. I have not tested that the math performed by these benchmarks is correct.
|
||||
// They're really more meant to be representative load. (Wouldn't hurt to be correct though.)
|
||||
|
||||
namespace {
|
||||
|
||||
enum Mode {Opts, RP, F32, I32, I32_SWAR};
|
||||
@ -85,9 +82,9 @@ namespace {
|
||||
|
||||
skvm::I32 invA = sub(splat(0xff), a);
|
||||
r = add(r, mul_unorm8(dr, invA));
|
||||
g = add(g, mul_unorm8(dr, invA));
|
||||
b = add(b, mul_unorm8(dr, invA));
|
||||
a = add(a, mul_unorm8(dr, invA));
|
||||
g = add(g, mul_unorm8(dg, invA));
|
||||
b = add(b, mul_unorm8(db, invA));
|
||||
a = add(a, mul_unorm8(da, invA));
|
||||
|
||||
store32(dst, bit_or( r ,
|
||||
bit_or(shl(g, 8),
|
||||
@ -110,7 +107,10 @@ namespace {
|
||||
|
||||
auto mul_unorm8 = [&](skvm::I32 x, skvm::I32 y) {
|
||||
// As above, assuming x is two SWAR bytes in lanes 0 and 2, and y is a byte.
|
||||
return shr(add(mul(x, y), splat(0x00ff00ff)), 8);
|
||||
return bit_and(shr(add(mul(x, y),
|
||||
splat(0x00ff00ff)),
|
||||
8),
|
||||
splat(0x00ff00ff));
|
||||
};
|
||||
|
||||
skvm::I32 rb, ga;
|
||||
@ -157,6 +157,12 @@ private:
|
||||
fPipeline.append(SkRasterPipeline::srcover);
|
||||
fPipeline.append(SkRasterPipeline::store_8888, &fDstCtx);
|
||||
}
|
||||
|
||||
// Trigger one run now so we can do a quick correctness check.
|
||||
this->draw(1,nullptr);
|
||||
for (int i = 0; i < fPixels; i++) {
|
||||
SkASSERT(fDst[i] == 0xff5e6f80);
|
||||
}
|
||||
}
|
||||
|
||||
void onDraw(int loops, SkCanvas*) override {
|
||||
|
Loading…
Reference in New Issue
Block a user