Clean up remaining users of SkPMFloat

This switches over SkXfermodes_opts.h and SkColorMatrixFilter to use Sk4f,
and converts the SkPMFloat benches to Sk4f benches.

No pixels should change here, and no code beyond the Sk4f_ benches should change speed.
The benches are faster than the old versions.

BUG=skia:4117

Review URL: https://codereview.chromium.org/1324743002
This commit is contained in:
mtklein 2015-08-31 15:26:08 -07:00 committed by Commit bot
parent 99138876a6
commit dde03ff89f
6 changed files with 84 additions and 184 deletions

View File

@ -6,7 +6,8 @@
*/
#include "Benchmark.h"
#include "SkPMFloat.h"
#include "SkColor.h"
#include "SkNx.h"
// Used to prevent the compiler from optimizing away the whole loop.
volatile uint32_t blackhole = 0;
@ -19,11 +20,10 @@ static uint32_t lcg_rand(uint32_t* seed) {
return *seed;
}
// I'm having better luck getting these to constant-propagate away as template parameters.
struct PMFloatRoundtripBench : public Benchmark {
PMFloatRoundtripBench() {}
struct Sk4fBytesRoundtripBench : public Benchmark {
Sk4fBytesRoundtripBench() {}
const char* onGetName() override { return "SkPMFloat_roundtrip"; }
const char* onGetName() override { return "Sk4f_roundtrip"; }
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
void onDraw(const int loops, SkCanvas* canvas) override {
@ -31,32 +31,25 @@ struct PMFloatRoundtripBench : public Benchmark {
uint32_t junk = 0;
uint32_t seed = 0;
for (int i = 0; i < loops; i++) {
SkPMColor color;
#ifdef SK_DEBUG
// Our SkASSERTs will remind us that it's technically required that we premultiply.
color = SkPreMultiplyColor(lcg_rand(&seed));
#else
// But it's a lot faster not to, and this code won't really mind the non-PM colors.
color = lcg_rand(&seed);
#endif
auto f = SkPMFloat::FromPMColor(color);
SkPMColor back = f.round();
uint32_t color = lcg_rand(&seed),
back;
auto f = Sk4f::FromBytes((const uint8_t*)&color);
f.toBytes((uint8_t*)&back);
junk ^= back;
}
blackhole ^= junk;
}
};
DEF_BENCH(return new PMFloatRoundtripBench;)
DEF_BENCH(return new Sk4fBytesRoundtripBench;)
struct PMFloatGradientBench : public Benchmark {
const char* onGetName() override { return "PMFloat_gradient"; }
struct Sk4fGradientBench : public Benchmark {
const char* onGetName() override { return "Sk4f_gradient"; }
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
SkPMColor fDevice[100];
void onDraw(const int loops, SkCanvas*) override {
Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0),
c1 = SkPMFloat::FromARGB(1, 0, 0, 1),
Sk4f c0(0,0,255,255),
c1(255,0,0,255),
dc = c1 - c0,
fx(0.1f),
dx(0.002f),
@ -64,15 +57,15 @@ struct PMFloatGradientBench : public Benchmark {
dcdx4(dcdx+dcdx+dcdx+dcdx);
for (int n = 0; n < loops; n++) {
Sk4f a = c0 + dc*fx,
Sk4f a = c0 + dc*fx + Sk4f(0.5f), // add an extra 0.5f to get rounding for free.
b = a + dcdx,
c = b + dcdx,
d = c + dcdx;
for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
fDevice[i+0] = SkPMFloat(a).round();
fDevice[i+1] = SkPMFloat(b).round();
fDevice[i+2] = SkPMFloat(c).round();
fDevice[i+3] = SkPMFloat(d).round();
a.toBytes((uint8_t*)(fDevice+i+0));
b.toBytes((uint8_t*)(fDevice+i+1));
c.toBytes((uint8_t*)(fDevice+i+2));
d.toBytes((uint8_t*)(fDevice+i+3));
a = a + dcdx4;
b = b + dcdx4;
c = c + dcdx4;
@ -81,5 +74,4 @@ struct PMFloatGradientBench : public Benchmark {
}
}
};
DEF_BENCH(return new PMFloatGradientBench;)
DEF_BENCH(return new Sk4fGradientBench;)

View File

@ -1,69 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkPM_DEFINED
#define SkPM_DEFINED
#include "SkTypes.h"
#include "SkColor.h"
#include "SkColorPriv.h"
#include "SkNx.h"
// This file may be included multiple times by .cpp files with different flags, leading
// to different definitions. Usually that doesn't matter because it's all inlined, but
// in Debug modes the compilers may not inline everything. So wrap everything in an
// anonymous namespace to give each includer their own silo of this code (or the linker
// will probably pick one randomly for us, which is rarely correct).
namespace {
// A pre-multiplied color storing each component in the same order as SkPMColor,
// but as a float in the range [0, 1].
class SkPMFloat : public Sk4f {
public:
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
static SkPMFloat FromOpaqueColor(SkColor c); // Requires c's alpha == 0xFF.
Sk4f alphas() const { return Sk4f(this->a()); }
// Uninitialized.
SkPMFloat() {}
explicit SkPMFloat(SkPMColor c) { *this = Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); }
SkPMFloat(float a, float r, float g, float b)
#ifdef SK_PMCOLOR_IS_RGBA
: INHERITED(r,g,b,a) {}
#else
: INHERITED(b,g,r,a) {}
#endif
SkPMFloat(const Sk4f& fs) : INHERITED(fs) {}
float a() const { return this->kth<SK_A32_SHIFT / 8>(); }
float r() const { return this->kth<SK_R32_SHIFT / 8>(); }
float g() const { return this->kth<SK_G32_SHIFT / 8>(); }
float b() const { return this->kth<SK_B32_SHIFT / 8>(); }
SkPMColor round() const {
SkPMColor c;
(*this * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
return c;
}
bool isValid() const {
return this->a() >= 0 && this->a() <= 1
&& this->r() >= 0 && this->r() <= this->a()
&& this->g() >= 0 && this->g() <= this->a()
&& this->b() >= 0 && this->b() <= this->a();
}
private:
typedef Sk4f INHERITED;
};
} // namespace
#endif//SkPM_DEFINED

View File

@ -12,7 +12,6 @@
#include "SkLazyPtr.h"
#include "SkMathPriv.h"
#include "SkOpts.h"
#include "SkPMFloat.h"
#include "SkReadBuffer.h"
#include "SkString.h"
#include "SkWriteBuffer.h"

View File

@ -8,7 +8,7 @@
#include "SkColorMatrixFilter.h"
#include "SkColorMatrix.h"
#include "SkColorPriv.h"
#include "SkPMFloat.h"
#include "SkNx.h"
#include "SkReadBuffer.h"
#include "SkWriteBuffer.h"
#include "SkUnPreMultiply.h"
@ -239,25 +239,27 @@ uint32_t SkColorMatrixFilter::getFlags() const {
return this->INHERITED::getFlags() | fFlags;
}
static Sk4f scale_rgb(float scale) {
static_assert(SK_A32_SHIFT == 24, "Alpha is lane 3");
return Sk4f(scale, scale, scale, 1);
}
static Sk4f premul(const Sk4f& x) {
float scale = SkPMFloat(x).a();
Sk4f pm = x * SkPMFloat(1, scale, scale, scale);
#ifdef SK_DEBUG
SkPMFloat pmf(pm);
SkASSERT(pmf.isValid());
#endif
return pm;
return x * scale_rgb(x.kth<SK_A32_SHIFT/8>());
}
static Sk4f unpremul(const SkPMFloat& pm) {
float scale = 1 / pm.a(); // candidate for fast/approx invert?
return pm * SkPMFloat(1, scale, scale, scale);
static Sk4f unpremul(const Sk4f& x) {
return x * scale_rgb(1 / x.kth<SK_A32_SHIFT/8>()); // TODO: fast/approx invert?
}
static Sk4f clamp_0_1(const Sk4f& value) {
return Sk4f::Max(Sk4f::Min(value, Sk4f(1)), Sk4f(0));
static Sk4f clamp_0_1(const Sk4f& x) {
return Sk4f::Max(Sk4f::Min(x, Sk4f(1)), Sk4f(0));
}
static SkPMColor round(const Sk4f& x) {
SkPMColor c;
(x * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
return c;
}
void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
@ -285,7 +287,7 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
const Sk4f c4 = Sk4f::Load(fTranspose + 16)*Sk4f(1.0f/255);
// todo: we could cache this in the constructor...
SkPMColor matrix_translate_pmcolor = SkPMFloat(premul(clamp_0_1(c4))).round();
SkPMColor matrix_translate_pmcolor = round(premul(clamp_0_1(c4)));
for (int i = 0; i < count; i++) {
const SkPMColor src_c = src[i];
@ -294,22 +296,22 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
continue;
}
SkPMFloat srcf(src_c);
Sk4f srcf = Sk4f::FromBytes((const uint8_t*)&src_c) * Sk4f(1.0f/255);
if (0xFF != SkGetPackedA32(src_c)) {
srcf = unpremul(srcf);
}
Sk4f r4 = Sk4f(srcf.r());
Sk4f g4 = Sk4f(srcf.g());
Sk4f b4 = Sk4f(srcf.b());
Sk4f a4 = Sk4f(srcf.a());
Sk4f r4 = Sk4f(srcf.kth<SK_R32_SHIFT/8>());
Sk4f g4 = Sk4f(srcf.kth<SK_G32_SHIFT/8>());
Sk4f b4 = Sk4f(srcf.kth<SK_B32_SHIFT/8>());
Sk4f a4 = Sk4f(srcf.kth<SK_A32_SHIFT/8>());
// apply matrix
Sk4f dst4 = c0 * r4 + c1 * g4 + c2 * b4 + c3 * a4 + c4;
// clamp, re-premul, and write
dst[i] = SkPMFloat(premul(clamp_0_1(dst4))).round();
dst[i] = round(premul(clamp_0_1(dst4)));
}
} else {
const State& state = fState;

View File

@ -9,7 +9,7 @@
#define Sk4pxXfermode_DEFINED
#include "Sk4px.h"
#include "SkPMFloat.h"
#include "SkNx.h"
#include "SkXfermode_proccoeff.h"
namespace {
@ -110,11 +110,19 @@ XFERMODE(Lighten) {
#undef XFERMODE
// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
#define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat d, SkPMFloat s)
#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
static_assert(SK_A32_SHIFT == 24, "");
return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
}
static inline Sk4f alphas(const Sk4f& f) {
return Sk4f(f.kth<SK_A32_SHIFT/8>());
}
XFERMODE(ColorDodge) {
auto sa = s.alphas(),
da = d.alphas(),
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
@ -126,11 +134,11 @@ XFERMODE(ColorDodge) {
auto colors = (d == Sk4f(0)).thenElse(dstover,
(s == sa).thenElse(srcover,
otherwise));
return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
return a_rgb(srcover, colors);
}
XFERMODE(ColorBurn) {
auto sa = s.alphas(),
da = d.alphas(),
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
@ -142,11 +150,11 @@ XFERMODE(ColorBurn) {
auto colors = (d == da).thenElse(dstover,
(s == Sk4f(0)).thenElse(srcover,
otherwise));
return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
return a_rgb(srcover, colors);
}
XFERMODE(SoftLight) {
auto sa = s.alphas(),
da = d.alphas(),
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
@ -167,7 +175,7 @@ XFERMODE(SoftLight) {
auto alpha = s + d*isa;
auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
return a_rgb(alpha, colors);
}
#undef XFERMODE
@ -232,10 +240,10 @@ private:
typedef SkProcCoeffXfermode INHERITED;
};
class SkPMFloatXfermode : public SkProcCoeffXfermode {
class Sk4fXfermode : public SkProcCoeffXfermode {
public:
typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat);
SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
: INHERITED(rec, mode)
, fProcF(procf) {}
@ -256,18 +264,26 @@ public:
}
private:
static Sk4f Load(SkPMColor c) {
return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
}
static SkPMColor Round(const Sk4f& f) {
SkPMColor c;
(f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
return c;
}
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
return fProcF(SkPMFloat(dst), SkPMFloat(src)).round();
return Round(fProcF(Load(dst), Load(src)));
}
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
SkPMFloat s(src),
d(dst),
b(fProcF(d,s));
Sk4f s(Load(src)),
d(Load(dst)),
b(fProcF(d,s));
// We do aa in full float precision before going back down to bytes, because we can!
SkPMFloat a = Sk4f(aa) * Sk4f(1.0f/255);
Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
b = b*a + d*(Sk4f(1)-a);
return b.round();
return Round(b);
}
ProcF fProcF;
@ -280,9 +296,8 @@ namespace SK_OPTS_NS {
static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
switch (mode) {
#define CASE(Mode) \
case SkXfermode::k##Mode##_Mode: \
return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
#define CASE(Mode) \
case SkXfermode::k##Mode##_Mode: return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
CASE(Clear);
CASE(Src);
CASE(Dst);
@ -307,9 +322,8 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
CASE(Lighten);
#undef CASE
#define CASE(Mode) \
case SkXfermode::k##Mode##_Mode: \
return new SkPMFloatXfermode(rec, mode, &Mode)
#define CASE(Mode) \
case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
CASE(ColorDodge);
CASE(ColorBurn);
CASE(SoftLight);

View File

@ -1,38 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkPMFloat.h"
#include "Test.h"
DEF_TEST(SkPMFloat, r) {
// Test SkPMColor <-> SkPMFloat
SkPMColor c = SkPreMultiplyColor(0xFFCC9933);
SkPMFloat pmf(c);
REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*pmf.a()));
REPORTER_ASSERT(r, SkScalarNearlyEqual(204.0f, 255*pmf.r()));
REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*pmf.g()));
REPORTER_ASSERT(r, SkScalarNearlyEqual( 51.0f, 255*pmf.b()));
REPORTER_ASSERT(r, c == pmf.round());
// Test rounding.
pmf = SkPMFloat(254.5f/255, 203.5f/255, 153.1f/255, 50.8f/255);
REPORTER_ASSERT(r, c == pmf.round());
SkPMFloat clamped(SkPMFloat(510.0f/255, 153.0f/255, 1.0f/255, -0.2f/255).round());
REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*clamped.a()));
REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*clamped.r()));
REPORTER_ASSERT(r, SkScalarNearlyEqual( 1.0f, 255*clamped.g()));
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.0f, 255*clamped.b()));
// Test SkPMFloat <-> Sk4f conversion.
Sk4f fs = clamped;
SkPMFloat scaled = fs * Sk4f(0.25f);
REPORTER_ASSERT(r, SkScalarNearlyEqual(63.75f, 255*scaled.a()));
REPORTER_ASSERT(r, SkScalarNearlyEqual(38.25f, 255*scaled.r()));
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.25f, 255*scaled.g()));
REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.00f, 255*scaled.b()));
}