2015-03-25 20:43:34 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2015 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
2015-02-23 18:04:34 +00:00
|
|
|
#include "Benchmark.h"
|
|
|
|
#include "SkPMFloat.h"
|
2015-03-03 16:03:27 +00:00
|
|
|
|
|
|
|
// Used to prevent the compiler from optimizing away the whole loop.
|
|
|
|
volatile uint32_t blackhole = 0;
|
|
|
|
|
|
|
|
// Not a great random number generator, but it's very fast.
|
|
|
|
// The code we're measuring is quite fast, so low overhead is essential.
|
|
|
|
static uint32_t lcg_rand(uint32_t* seed) {
|
|
|
|
*seed *= 1664525;
|
|
|
|
*seed += 1013904223;
|
|
|
|
return *seed;
|
|
|
|
}
|
2015-02-23 18:04:34 +00:00
|
|
|
|
2015-03-05 19:31:59 +00:00
|
|
|
// I'm having better luck getting these to constant-propagate away as template parameters.
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
struct PMFloatRoundtripBench : public Benchmark {
|
|
|
|
PMFloatRoundtripBench() {}
|
2015-02-23 18:04:34 +00:00
|
|
|
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
const char* onGetName() override { return "SkPMFloat_roundtrip"; }
|
2015-03-26 01:17:31 +00:00
|
|
|
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
2015-02-23 18:04:34 +00:00
|
|
|
|
2015-03-26 01:17:31 +00:00
|
|
|
void onDraw(const int loops, SkCanvas* canvas) override {
|
2015-03-03 16:03:27 +00:00
|
|
|
// Unlike blackhole, junk can and probably will be a register.
|
|
|
|
uint32_t junk = 0;
|
|
|
|
uint32_t seed = 0;
|
2015-02-23 18:04:34 +00:00
|
|
|
for (int i = 0; i < loops; i++) {
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
SkPMColor color;
|
2015-03-03 16:03:27 +00:00
|
|
|
#ifdef SK_DEBUG
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
// Our SkASSERTs will remind us that it's technically required that we premultiply.
|
|
|
|
color = SkPreMultiplyColor(lcg_rand(&seed));
|
2015-03-03 16:03:27 +00:00
|
|
|
#else
|
|
|
|
// But it's a lot faster not to, and this code won't really mind the non-PM colors.
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
color = lcg_rand(&seed);
|
2015-03-03 16:03:27 +00:00
|
|
|
#endif
|
2015-03-05 19:31:59 +00:00
|
|
|
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
auto f = SkPMFloat::FromPMColor(color);
|
|
|
|
SkPMColor back = f.round();
|
|
|
|
junk ^= back;
|
2015-02-23 18:04:34 +00:00
|
|
|
}
|
2015-03-03 16:03:27 +00:00
|
|
|
blackhole ^= junk;
|
2015-02-23 18:04:34 +00:00
|
|
|
}
|
|
|
|
};
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
DEF_BENCH(return new PMFloatRoundtripBench;)
|
2015-03-26 01:13:02 +00:00
|
|
|
|
|
|
|
struct PMFloatGradientBench : public Benchmark {
|
|
|
|
const char* onGetName() override { return "PMFloat_gradient"; }
|
|
|
|
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
|
|
|
|
|
|
|
SkPMColor fDevice[100];
|
|
|
|
void onDraw(const int loops, SkCanvas*) override {
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0),
|
|
|
|
c1 = SkPMFloat::FromARGB(1, 0, 0, 1),
|
2015-03-26 01:13:02 +00:00
|
|
|
dc = c1 - c0,
|
|
|
|
fx(0.1f),
|
|
|
|
dx(0.002f),
|
|
|
|
dcdx(dc*dx),
|
|
|
|
dcdx4(dcdx+dcdx+dcdx+dcdx);
|
|
|
|
|
|
|
|
for (int n = 0; n < loops; n++) {
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
Sk4f a = c0 + dc*fx,
|
2015-03-26 01:13:02 +00:00
|
|
|
b = a + dcdx,
|
|
|
|
c = b + dcdx,
|
|
|
|
d = c + dcdx;
|
|
|
|
for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
|
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
2015-06-25 15:56:28 +00:00
|
|
|
fDevice[i+0] = SkPMFloat(a).round();
|
|
|
|
fDevice[i+1] = SkPMFloat(b).round();
|
|
|
|
fDevice[i+2] = SkPMFloat(c).round();
|
|
|
|
fDevice[i+3] = SkPMFloat(d).round();
|
2015-06-10 15:57:28 +00:00
|
|
|
a = a + dcdx4;
|
|
|
|
b = b + dcdx4;
|
|
|
|
c = c + dcdx4;
|
|
|
|
d = d + dcdx4;
|
2015-03-26 01:13:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
DEF_BENCH(return new PMFloatGradientBench;)
|