Clean up remaining users of SkPMFloat
This switches over SkXfermodes_opts.h and SkColorMatrixFilter to use Sk4f, and converts the SkPMFloat benches to Sk4f benches. No pixels should change here, and no code beyond the Sk4f_ benches should change speed. The benches are faster than the old versions. BUG=skia:4117 Review URL: https://codereview.chromium.org/1324743002
This commit is contained in:
parent
99138876a6
commit
dde03ff89f
@ -6,7 +6,8 @@
|
||||
*/
|
||||
|
||||
#include "Benchmark.h"
|
||||
#include "SkPMFloat.h"
|
||||
#include "SkColor.h"
|
||||
#include "SkNx.h"
|
||||
|
||||
// Used to prevent the compiler from optimizing away the whole loop.
|
||||
volatile uint32_t blackhole = 0;
|
||||
@ -19,11 +20,10 @@ static uint32_t lcg_rand(uint32_t* seed) {
|
||||
return *seed;
|
||||
}
|
||||
|
||||
// I'm having better luck getting these to constant-propagate away as template parameters.
|
||||
struct PMFloatRoundtripBench : public Benchmark {
|
||||
PMFloatRoundtripBench() {}
|
||||
struct Sk4fBytesRoundtripBench : public Benchmark {
|
||||
Sk4fBytesRoundtripBench() {}
|
||||
|
||||
const char* onGetName() override { return "SkPMFloat_roundtrip"; }
|
||||
const char* onGetName() override { return "Sk4f_roundtrip"; }
|
||||
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
||||
|
||||
void onDraw(const int loops, SkCanvas* canvas) override {
|
||||
@ -31,32 +31,25 @@ struct PMFloatRoundtripBench : public Benchmark {
|
||||
uint32_t junk = 0;
|
||||
uint32_t seed = 0;
|
||||
for (int i = 0; i < loops; i++) {
|
||||
SkPMColor color;
|
||||
#ifdef SK_DEBUG
|
||||
// Our SkASSERTs will remind us that it's technically required that we premultiply.
|
||||
color = SkPreMultiplyColor(lcg_rand(&seed));
|
||||
#else
|
||||
// But it's a lot faster not to, and this code won't really mind the non-PM colors.
|
||||
color = lcg_rand(&seed);
|
||||
#endif
|
||||
|
||||
auto f = SkPMFloat::FromPMColor(color);
|
||||
SkPMColor back = f.round();
|
||||
uint32_t color = lcg_rand(&seed),
|
||||
back;
|
||||
auto f = Sk4f::FromBytes((const uint8_t*)&color);
|
||||
f.toBytes((uint8_t*)&back);
|
||||
junk ^= back;
|
||||
}
|
||||
blackhole ^= junk;
|
||||
}
|
||||
};
|
||||
DEF_BENCH(return new PMFloatRoundtripBench;)
|
||||
DEF_BENCH(return new Sk4fBytesRoundtripBench;)
|
||||
|
||||
struct PMFloatGradientBench : public Benchmark {
|
||||
const char* onGetName() override { return "PMFloat_gradient"; }
|
||||
struct Sk4fGradientBench : public Benchmark {
|
||||
const char* onGetName() override { return "Sk4f_gradient"; }
|
||||
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
||||
|
||||
SkPMColor fDevice[100];
|
||||
void onDraw(const int loops, SkCanvas*) override {
|
||||
Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0),
|
||||
c1 = SkPMFloat::FromARGB(1, 0, 0, 1),
|
||||
Sk4f c0(0,0,255,255),
|
||||
c1(255,0,0,255),
|
||||
dc = c1 - c0,
|
||||
fx(0.1f),
|
||||
dx(0.002f),
|
||||
@ -64,15 +57,15 @@ struct PMFloatGradientBench : public Benchmark {
|
||||
dcdx4(dcdx+dcdx+dcdx+dcdx);
|
||||
|
||||
for (int n = 0; n < loops; n++) {
|
||||
Sk4f a = c0 + dc*fx,
|
||||
Sk4f a = c0 + dc*fx + Sk4f(0.5f), // add an extra 0.5f to get rounding for free.
|
||||
b = a + dcdx,
|
||||
c = b + dcdx,
|
||||
d = c + dcdx;
|
||||
for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
|
||||
fDevice[i+0] = SkPMFloat(a).round();
|
||||
fDevice[i+1] = SkPMFloat(b).round();
|
||||
fDevice[i+2] = SkPMFloat(c).round();
|
||||
fDevice[i+3] = SkPMFloat(d).round();
|
||||
a.toBytes((uint8_t*)(fDevice+i+0));
|
||||
b.toBytes((uint8_t*)(fDevice+i+1));
|
||||
c.toBytes((uint8_t*)(fDevice+i+2));
|
||||
d.toBytes((uint8_t*)(fDevice+i+3));
|
||||
a = a + dcdx4;
|
||||
b = b + dcdx4;
|
||||
c = c + dcdx4;
|
||||
@ -81,5 +74,4 @@ struct PMFloatGradientBench : public Benchmark {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DEF_BENCH(return new PMFloatGradientBench;)
|
||||
DEF_BENCH(return new Sk4fGradientBench;)
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright 2015 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkPM_DEFINED
|
||||
#define SkPM_DEFINED
|
||||
|
||||
#include "SkTypes.h"
|
||||
#include "SkColor.h"
|
||||
#include "SkColorPriv.h"
|
||||
#include "SkNx.h"
|
||||
|
||||
// This file may be included multiple times by .cpp files with different flags, leading
|
||||
// to different definitions. Usually that doesn't matter because it's all inlined, but
|
||||
// in Debug modes the compilers may not inline everything. So wrap everything in an
|
||||
// anonymous namespace to give each includer their own silo of this code (or the linker
|
||||
// will probably pick one randomly for us, which is rarely correct).
|
||||
namespace {
|
||||
|
||||
// A pre-multiplied color storing each component in the same order as SkPMColor,
|
||||
// but as a float in the range [0, 1].
|
||||
class SkPMFloat : public Sk4f {
|
||||
public:
|
||||
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
|
||||
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
|
||||
static SkPMFloat FromOpaqueColor(SkColor c); // Requires c's alpha == 0xFF.
|
||||
|
||||
Sk4f alphas() const { return Sk4f(this->a()); }
|
||||
|
||||
// Uninitialized.
|
||||
SkPMFloat() {}
|
||||
explicit SkPMFloat(SkPMColor c) { *this = Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); }
|
||||
SkPMFloat(float a, float r, float g, float b)
|
||||
#ifdef SK_PMCOLOR_IS_RGBA
|
||||
: INHERITED(r,g,b,a) {}
|
||||
#else
|
||||
: INHERITED(b,g,r,a) {}
|
||||
#endif
|
||||
|
||||
SkPMFloat(const Sk4f& fs) : INHERITED(fs) {}
|
||||
|
||||
float a() const { return this->kth<SK_A32_SHIFT / 8>(); }
|
||||
float r() const { return this->kth<SK_R32_SHIFT / 8>(); }
|
||||
float g() const { return this->kth<SK_G32_SHIFT / 8>(); }
|
||||
float b() const { return this->kth<SK_B32_SHIFT / 8>(); }
|
||||
|
||||
SkPMColor round() const {
|
||||
SkPMColor c;
|
||||
(*this * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
|
||||
return c;
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return this->a() >= 0 && this->a() <= 1
|
||||
&& this->r() >= 0 && this->r() <= this->a()
|
||||
&& this->g() >= 0 && this->g() <= this->a()
|
||||
&& this->b() >= 0 && this->b() <= this->a();
|
||||
}
|
||||
|
||||
private:
|
||||
typedef Sk4f INHERITED;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif//SkPM_DEFINED
|
@ -12,7 +12,6 @@
|
||||
#include "SkLazyPtr.h"
|
||||
#include "SkMathPriv.h"
|
||||
#include "SkOpts.h"
|
||||
#include "SkPMFloat.h"
|
||||
#include "SkReadBuffer.h"
|
||||
#include "SkString.h"
|
||||
#include "SkWriteBuffer.h"
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include "SkColorMatrixFilter.h"
|
||||
#include "SkColorMatrix.h"
|
||||
#include "SkColorPriv.h"
|
||||
#include "SkPMFloat.h"
|
||||
#include "SkNx.h"
|
||||
#include "SkReadBuffer.h"
|
||||
#include "SkWriteBuffer.h"
|
||||
#include "SkUnPreMultiply.h"
|
||||
@ -239,25 +239,27 @@ uint32_t SkColorMatrixFilter::getFlags() const {
|
||||
return this->INHERITED::getFlags() | fFlags;
|
||||
}
|
||||
|
||||
static Sk4f scale_rgb(float scale) {
|
||||
static_assert(SK_A32_SHIFT == 24, "Alpha is lane 3");
|
||||
return Sk4f(scale, scale, scale, 1);
|
||||
}
|
||||
|
||||
static Sk4f premul(const Sk4f& x) {
|
||||
float scale = SkPMFloat(x).a();
|
||||
Sk4f pm = x * SkPMFloat(1, scale, scale, scale);
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
SkPMFloat pmf(pm);
|
||||
SkASSERT(pmf.isValid());
|
||||
#endif
|
||||
|
||||
return pm;
|
||||
return x * scale_rgb(x.kth<SK_A32_SHIFT/8>());
|
||||
}
|
||||
|
||||
static Sk4f unpremul(const SkPMFloat& pm) {
|
||||
float scale = 1 / pm.a(); // candidate for fast/approx invert?
|
||||
return pm * SkPMFloat(1, scale, scale, scale);
|
||||
static Sk4f unpremul(const Sk4f& x) {
|
||||
return x * scale_rgb(1 / x.kth<SK_A32_SHIFT/8>()); // TODO: fast/approx invert?
|
||||
}
|
||||
|
||||
static Sk4f clamp_0_1(const Sk4f& value) {
|
||||
return Sk4f::Max(Sk4f::Min(value, Sk4f(1)), Sk4f(0));
|
||||
static Sk4f clamp_0_1(const Sk4f& x) {
|
||||
return Sk4f::Max(Sk4f::Min(x, Sk4f(1)), Sk4f(0));
|
||||
}
|
||||
|
||||
static SkPMColor round(const Sk4f& x) {
|
||||
SkPMColor c;
|
||||
(x * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
|
||||
return c;
|
||||
}
|
||||
|
||||
void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
|
||||
@ -285,7 +287,7 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
|
||||
const Sk4f c4 = Sk4f::Load(fTranspose + 16)*Sk4f(1.0f/255);
|
||||
|
||||
// todo: we could cache this in the constructor...
|
||||
SkPMColor matrix_translate_pmcolor = SkPMFloat(premul(clamp_0_1(c4))).round();
|
||||
SkPMColor matrix_translate_pmcolor = round(premul(clamp_0_1(c4)));
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
const SkPMColor src_c = src[i];
|
||||
@ -294,22 +296,22 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
|
||||
continue;
|
||||
}
|
||||
|
||||
SkPMFloat srcf(src_c);
|
||||
Sk4f srcf = Sk4f::FromBytes((const uint8_t*)&src_c) * Sk4f(1.0f/255);
|
||||
|
||||
if (0xFF != SkGetPackedA32(src_c)) {
|
||||
srcf = unpremul(srcf);
|
||||
}
|
||||
|
||||
Sk4f r4 = Sk4f(srcf.r());
|
||||
Sk4f g4 = Sk4f(srcf.g());
|
||||
Sk4f b4 = Sk4f(srcf.b());
|
||||
Sk4f a4 = Sk4f(srcf.a());
|
||||
Sk4f r4 = Sk4f(srcf.kth<SK_R32_SHIFT/8>());
|
||||
Sk4f g4 = Sk4f(srcf.kth<SK_G32_SHIFT/8>());
|
||||
Sk4f b4 = Sk4f(srcf.kth<SK_B32_SHIFT/8>());
|
||||
Sk4f a4 = Sk4f(srcf.kth<SK_A32_SHIFT/8>());
|
||||
|
||||
// apply matrix
|
||||
Sk4f dst4 = c0 * r4 + c1 * g4 + c2 * b4 + c3 * a4 + c4;
|
||||
|
||||
// clamp, re-premul, and write
|
||||
dst[i] = SkPMFloat(premul(clamp_0_1(dst4))).round();
|
||||
dst[i] = round(premul(clamp_0_1(dst4)));
|
||||
}
|
||||
} else {
|
||||
const State& state = fState;
|
||||
|
@ -9,7 +9,7 @@
|
||||
#define Sk4pxXfermode_DEFINED
|
||||
|
||||
#include "Sk4px.h"
|
||||
#include "SkPMFloat.h"
|
||||
#include "SkNx.h"
|
||||
#include "SkXfermode_proccoeff.h"
|
||||
|
||||
namespace {
|
||||
@ -110,11 +110,19 @@ XFERMODE(Lighten) {
|
||||
#undef XFERMODE
|
||||
|
||||
// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
|
||||
#define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat d, SkPMFloat s)
|
||||
#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
|
||||
|
||||
static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
|
||||
static_assert(SK_A32_SHIFT == 24, "");
|
||||
return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
|
||||
}
|
||||
static inline Sk4f alphas(const Sk4f& f) {
|
||||
return Sk4f(f.kth<SK_A32_SHIFT/8>());
|
||||
}
|
||||
|
||||
XFERMODE(ColorDodge) {
|
||||
auto sa = s.alphas(),
|
||||
da = d.alphas(),
|
||||
auto sa = alphas(s),
|
||||
da = alphas(d),
|
||||
isa = Sk4f(1)-sa,
|
||||
ida = Sk4f(1)-da;
|
||||
|
||||
@ -126,11 +134,11 @@ XFERMODE(ColorDodge) {
|
||||
auto colors = (d == Sk4f(0)).thenElse(dstover,
|
||||
(s == sa).thenElse(srcover,
|
||||
otherwise));
|
||||
return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
|
||||
return a_rgb(srcover, colors);
|
||||
}
|
||||
XFERMODE(ColorBurn) {
|
||||
auto sa = s.alphas(),
|
||||
da = d.alphas(),
|
||||
auto sa = alphas(s),
|
||||
da = alphas(d),
|
||||
isa = Sk4f(1)-sa,
|
||||
ida = Sk4f(1)-da;
|
||||
|
||||
@ -142,11 +150,11 @@ XFERMODE(ColorBurn) {
|
||||
auto colors = (d == da).thenElse(dstover,
|
||||
(s == Sk4f(0)).thenElse(srcover,
|
||||
otherwise));
|
||||
return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
|
||||
return a_rgb(srcover, colors);
|
||||
}
|
||||
XFERMODE(SoftLight) {
|
||||
auto sa = s.alphas(),
|
||||
da = d.alphas(),
|
||||
auto sa = alphas(s),
|
||||
da = alphas(d),
|
||||
isa = Sk4f(1)-sa,
|
||||
ida = Sk4f(1)-da;
|
||||
|
||||
@ -167,7 +175,7 @@ XFERMODE(SoftLight) {
|
||||
auto alpha = s + d*isa;
|
||||
auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
|
||||
|
||||
return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
|
||||
return a_rgb(alpha, colors);
|
||||
}
|
||||
#undef XFERMODE
|
||||
|
||||
@ -232,10 +240,10 @@ private:
|
||||
typedef SkProcCoeffXfermode INHERITED;
|
||||
};
|
||||
|
||||
class SkPMFloatXfermode : public SkProcCoeffXfermode {
|
||||
class Sk4fXfermode : public SkProcCoeffXfermode {
|
||||
public:
|
||||
typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat);
|
||||
SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
|
||||
typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
|
||||
Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
|
||||
: INHERITED(rec, mode)
|
||||
, fProcF(procf) {}
|
||||
|
||||
@ -256,18 +264,26 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static Sk4f Load(SkPMColor c) {
|
||||
return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
|
||||
}
|
||||
static SkPMColor Round(const Sk4f& f) {
|
||||
SkPMColor c;
|
||||
(f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
|
||||
return c;
|
||||
}
|
||||
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
|
||||
return fProcF(SkPMFloat(dst), SkPMFloat(src)).round();
|
||||
return Round(fProcF(Load(dst), Load(src)));
|
||||
}
|
||||
|
||||
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
|
||||
SkPMFloat s(src),
|
||||
d(dst),
|
||||
b(fProcF(d,s));
|
||||
Sk4f s(Load(src)),
|
||||
d(Load(dst)),
|
||||
b(fProcF(d,s));
|
||||
// We do aa in full float precision before going back down to bytes, because we can!
|
||||
SkPMFloat a = Sk4f(aa) * Sk4f(1.0f/255);
|
||||
Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
|
||||
b = b*a + d*(Sk4f(1)-a);
|
||||
return b.round();
|
||||
return Round(b);
|
||||
}
|
||||
|
||||
ProcF fProcF;
|
||||
@ -280,9 +296,8 @@ namespace SK_OPTS_NS {
|
||||
|
||||
static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
|
||||
switch (mode) {
|
||||
#define CASE(Mode) \
|
||||
case SkXfermode::k##Mode##_Mode: \
|
||||
return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
|
||||
#define CASE(Mode) \
|
||||
case SkXfermode::k##Mode##_Mode: return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
|
||||
CASE(Clear);
|
||||
CASE(Src);
|
||||
CASE(Dst);
|
||||
@ -307,9 +322,8 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
|
||||
CASE(Lighten);
|
||||
#undef CASE
|
||||
|
||||
#define CASE(Mode) \
|
||||
case SkXfermode::k##Mode##_Mode: \
|
||||
return new SkPMFloatXfermode(rec, mode, &Mode)
|
||||
#define CASE(Mode) \
|
||||
case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
|
||||
CASE(ColorDodge);
|
||||
CASE(ColorBurn);
|
||||
CASE(SoftLight);
|
||||
|
@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright 2015 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkPMFloat.h"
|
||||
#include "Test.h"
|
||||
|
||||
DEF_TEST(SkPMFloat, r) {
|
||||
// Test SkPMColor <-> SkPMFloat
|
||||
SkPMColor c = SkPreMultiplyColor(0xFFCC9933);
|
||||
SkPMFloat pmf(c);
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*pmf.a()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(204.0f, 255*pmf.r()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*pmf.g()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual( 51.0f, 255*pmf.b()));
|
||||
REPORTER_ASSERT(r, c == pmf.round());
|
||||
|
||||
// Test rounding.
|
||||
pmf = SkPMFloat(254.5f/255, 203.5f/255, 153.1f/255, 50.8f/255);
|
||||
REPORTER_ASSERT(r, c == pmf.round());
|
||||
|
||||
SkPMFloat clamped(SkPMFloat(510.0f/255, 153.0f/255, 1.0f/255, -0.2f/255).round());
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*clamped.a()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*clamped.r()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual( 1.0f, 255*clamped.g()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.0f, 255*clamped.b()));
|
||||
|
||||
// Test SkPMFloat <-> Sk4f conversion.
|
||||
Sk4f fs = clamped;
|
||||
SkPMFloat scaled = fs * Sk4f(0.25f);
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(63.75f, 255*scaled.a()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(38.25f, 255*scaled.r()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.25f, 255*scaled.g()));
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.00f, 255*scaled.b()));
|
||||
}
|
Loading…
Reference in New Issue
Block a user