Clean up remaining users of SkPMFloat

This switches over SkXfermodes_opts.h and SkColorMatrixFilter to use Sk4f, and converts the SkPMFloat benches to Sk4f benches. No pixels should change here, and no code beyond the Sk4f_ benches should change speed. The benches are faster than the old versions. BUG=skia:4117 Review URL: https://codereview.chromium.org/1324743002
2015-08-31 15:26:08 -07:00 · 2015-08-31 15:26:08 -07:00 · dde03ff89f
commit dde03ff89f
parent 99138876a6
6 changed files with 84 additions and 184 deletions
--- a/bench/PMFloatBench.cpp
+++ b/bench/PMFloatBench.cpp
@ -6,7 +6,8 @@
 */

 #include "Benchmark.h"
-#include "SkPMFloat.h"
+#include "SkColor.h"
+#include "SkNx.h"

 // Used to prevent the compiler from optimizing away the whole loop.
 volatile uint32_t blackhole = 0;
@ -19,11 +20,10 @@ static uint32_t lcg_rand(uint32_t* seed) {
    return *seed;
 }

-// I'm having better luck getting these to constant-propagate away as template parameters.
-struct PMFloatRoundtripBench : public Benchmark {
-    PMFloatRoundtripBench() {}
+struct Sk4fBytesRoundtripBench : public Benchmark {
+    Sk4fBytesRoundtripBench() {}

-    const char* onGetName() override { return "SkPMFloat_roundtrip"; }
+    const char* onGetName() override { return "Sk4f_roundtrip"; }
    bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }

    void onDraw(const int loops, SkCanvas* canvas) override {
@ -31,32 +31,25 @@ struct PMFloatRoundtripBench : public Benchmark {
        uint32_t junk = 0;
        uint32_t seed = 0;
        for (int i = 0; i < loops; i++) {
-            SkPMColor color;
-        #ifdef SK_DEBUG
-            // Our SkASSERTs will remind us that it's technically required that we premultiply.
-            color = SkPreMultiplyColor(lcg_rand(&seed));
-        #else
-            // But it's a lot faster not to, and this code won't really mind the non-PM colors.
-            color = lcg_rand(&seed);
-        #endif
-
-            auto f = SkPMFloat::FromPMColor(color);
-            SkPMColor back = f.round();
+            uint32_t color = lcg_rand(&seed),
+                     back;
+            auto f = Sk4f::FromBytes((const uint8_t*)&color);
+            f.toBytes((uint8_t*)&back);
            junk ^= back;
        }
        blackhole ^= junk;
    }
 };
-DEF_BENCH(return new PMFloatRoundtripBench;)
+DEF_BENCH(return new Sk4fBytesRoundtripBench;)

-struct PMFloatGradientBench : public Benchmark {
-    const char* onGetName() override { return "PMFloat_gradient"; }
+struct Sk4fGradientBench : public Benchmark {
+    const char* onGetName() override { return "Sk4f_gradient"; }
    bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }

    SkPMColor fDevice[100];
    void onDraw(const int loops, SkCanvas*) override {
-        Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0),
-             c1 = SkPMFloat::FromARGB(1, 0, 0, 1),
+        Sk4f c0(0,0,255,255),
+             c1(255,0,0,255),
             dc = c1 - c0,
             fx(0.1f),
             dx(0.002f),
@ -64,15 +57,15 @@ struct PMFloatGradientBench : public Benchmark {
             dcdx4(dcdx+dcdx+dcdx+dcdx);

        for (int n = 0; n < loops; n++) {
-            Sk4f a = c0 + dc*fx,
+            Sk4f a = c0 + dc*fx + Sk4f(0.5f),  // add an extra 0.5f to get rounding for free.
                 b = a + dcdx,
                 c = b + dcdx,
                 d = c + dcdx;
            for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
-                fDevice[i+0] = SkPMFloat(a).round();
-                fDevice[i+1] = SkPMFloat(b).round();
-                fDevice[i+2] = SkPMFloat(c).round();
-                fDevice[i+3] = SkPMFloat(d).round();
+                a.toBytes((uint8_t*)(fDevice+i+0));
+                b.toBytes((uint8_t*)(fDevice+i+1));
+                c.toBytes((uint8_t*)(fDevice+i+2));
+                d.toBytes((uint8_t*)(fDevice+i+3));
                a = a + dcdx4;
                b = b + dcdx4;
                c = c + dcdx4;
@ -81,5 +74,4 @@ struct PMFloatGradientBench : public Benchmark {
        }
    }
 };
-
-DEF_BENCH(return new PMFloatGradientBench;)
+DEF_BENCH(return new Sk4fGradientBench;)
--- a/src/core/SkPMFloat.h
+++ b/src/core/SkPMFloat.h
@ -1,69 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkPM_DEFINED
-#define SkPM_DEFINED
-
-#include "SkTypes.h"
-#include "SkColor.h"
-#include "SkColorPriv.h"
-#include "SkNx.h"
-
-// This file may be included multiple times by .cpp files with different flags, leading
-// to different definitions.  Usually that doesn't matter because it's all inlined, but
-// in Debug modes the compilers may not inline everything.  So wrap everything in an
-// anonymous namespace to give each includer their own silo of this code (or the linker
-// will probably pick one randomly for us, which is rarely correct).
-namespace {
-
-// A pre-multiplied color storing each component in the same order as SkPMColor,
-// but as a float in the range [0, 1].
-class SkPMFloat : public Sk4f {
-public:
-    static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
-    static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
-    static SkPMFloat FromOpaqueColor(SkColor c);  // Requires c's alpha == 0xFF.
-
-    Sk4f alphas() const { return Sk4f(this->a()); }
-
-    // Uninitialized.
-    SkPMFloat() {}
-    explicit SkPMFloat(SkPMColor c) { *this = Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); }
-    SkPMFloat(float a, float r, float g, float b)
-    #ifdef SK_PMCOLOR_IS_RGBA
-        : INHERITED(r,g,b,a) {}
-    #else
-        : INHERITED(b,g,r,a) {}
-    #endif
-
-    SkPMFloat(const Sk4f& fs) : INHERITED(fs) {}
-
-    float a() const { return this->kth<SK_A32_SHIFT / 8>(); }
-    float r() const { return this->kth<SK_R32_SHIFT / 8>(); }
-    float g() const { return this->kth<SK_G32_SHIFT / 8>(); }
-    float b() const { return this->kth<SK_B32_SHIFT / 8>(); }
-
-    SkPMColor round() const {
-        SkPMColor c;
-        (*this * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
-        return c;
-    }
-
-    bool isValid() const {
-        return this->a() >= 0 && this->a() <= 1
-            && this->r() >= 0 && this->r() <= this->a()
-            && this->g() >= 0 && this->g() <= this->a()
-            && this->b() >= 0 && this->b() <= this->a();
-    }
-
-private:
-    typedef Sk4f INHERITED;
-};
-
-}  // namespace
-
-#endif//SkPM_DEFINED
--- a/src/core/SkXfermode.cpp
+++ b/src/core/SkXfermode.cpp
@ -12,7 +12,6 @@
 #include "SkLazyPtr.h"
 #include "SkMathPriv.h"
 #include "SkOpts.h"
-#include "SkPMFloat.h"
 #include "SkReadBuffer.h"
 #include "SkString.h"
 #include "SkWriteBuffer.h"
--- a/src/effects/SkColorMatrixFilter.cpp
+++ b/src/effects/SkColorMatrixFilter.cpp
@ -8,7 +8,7 @@
 #include "SkColorMatrixFilter.h"
 #include "SkColorMatrix.h"
 #include "SkColorPriv.h"
-#include "SkPMFloat.h"
+#include "SkNx.h"
 #include "SkReadBuffer.h"
 #include "SkWriteBuffer.h"
 #include "SkUnPreMultiply.h"
@ -239,25 +239,27 @@ uint32_t SkColorMatrixFilter::getFlags() const {
    return this->INHERITED::getFlags() | fFlags;
 }

+static Sk4f scale_rgb(float scale) {
+    static_assert(SK_A32_SHIFT == 24, "Alpha is lane 3");
+    return Sk4f(scale, scale, scale, 1);
+}
+
 static Sk4f premul(const Sk4f& x) {
-    float scale = SkPMFloat(x).a();
-    Sk4f pm = x * SkPMFloat(1, scale, scale, scale);
-
-#ifdef SK_DEBUG
-    SkPMFloat pmf(pm);
-    SkASSERT(pmf.isValid());
-#endif
-
-    return pm;
+    return x * scale_rgb(x.kth<SK_A32_SHIFT/8>());
 }

-static Sk4f unpremul(const SkPMFloat& pm) {
-    float scale = 1 / pm.a(); // candidate for fast/approx invert?
-    return pm * SkPMFloat(1, scale, scale, scale);
+static Sk4f unpremul(const Sk4f& x) {
+    return x * scale_rgb(1 / x.kth<SK_A32_SHIFT/8>());  // TODO: fast/approx invert?
 }

-static Sk4f clamp_0_1(const Sk4f& value) {
-    return Sk4f::Max(Sk4f::Min(value, Sk4f(1)), Sk4f(0));
+static Sk4f clamp_0_1(const Sk4f& x) {
+    return Sk4f::Max(Sk4f::Min(x, Sk4f(1)), Sk4f(0));
+}
+
+static SkPMColor round(const Sk4f& x) {
+    SkPMColor c;
+    (x * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
+    return c;
 }

 void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
@ -285,7 +287,7 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
        const Sk4f c4 = Sk4f::Load(fTranspose + 16)*Sk4f(1.0f/255);

        // todo: we could cache this in the constructor...
-        SkPMColor matrix_translate_pmcolor = SkPMFloat(premul(clamp_0_1(c4))).round();
+        SkPMColor matrix_translate_pmcolor = round(premul(clamp_0_1(c4)));

        for (int i = 0; i < count; i++) {
            const SkPMColor src_c = src[i];
@ -294,22 +296,22 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
                continue;
            }

-            SkPMFloat srcf(src_c);
+            Sk4f srcf = Sk4f::FromBytes((const uint8_t*)&src_c) * Sk4f(1.0f/255);

            if (0xFF != SkGetPackedA32(src_c)) {
                srcf = unpremul(srcf);
            }

-            Sk4f r4 = Sk4f(srcf.r());
-            Sk4f g4 = Sk4f(srcf.g());
-            Sk4f b4 = Sk4f(srcf.b());
-            Sk4f a4 = Sk4f(srcf.a());
+            Sk4f r4 = Sk4f(srcf.kth<SK_R32_SHIFT/8>());
+            Sk4f g4 = Sk4f(srcf.kth<SK_G32_SHIFT/8>());
+            Sk4f b4 = Sk4f(srcf.kth<SK_B32_SHIFT/8>());
+            Sk4f a4 = Sk4f(srcf.kth<SK_A32_SHIFT/8>());

            // apply matrix
            Sk4f dst4 = c0 * r4 + c1 * g4 + c2 * b4 + c3 * a4 + c4;

            // clamp, re-premul, and write
-            dst[i] = SkPMFloat(premul(clamp_0_1(dst4))).round();
+            dst[i] = round(premul(clamp_0_1(dst4)));
        }
    } else {
        const State& state = fState;
--- a/src/opts/SkXfermode_opts.h
+++ b/src/opts/SkXfermode_opts.h
@ -9,7 +9,7 @@
 #define Sk4pxXfermode_DEFINED

 #include "Sk4px.h"
-#include "SkPMFloat.h"
+#include "SkNx.h"
 #include "SkXfermode_proccoeff.h"

 namespace {
@ -110,11 +110,19 @@ XFERMODE(Lighten) {
 #undef XFERMODE

 // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
-#define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat d, SkPMFloat s)
+#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
+
+static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
+    static_assert(SK_A32_SHIFT == 24, "");
+    return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
+}
+static inline Sk4f alphas(const Sk4f& f) {
+    return Sk4f(f.kth<SK_A32_SHIFT/8>());
+}

 XFERMODE(ColorDodge) {
-    auto sa = s.alphas(),
-         da = d.alphas(),
+    auto sa = alphas(s),
+         da = alphas(d),
         isa = Sk4f(1)-sa,
         ida = Sk4f(1)-da;

@ -126,11 +134,11 @@ XFERMODE(ColorDodge) {
    auto colors = (d == Sk4f(0)).thenElse(dstover,
                  (s ==      sa).thenElse(srcover,
                                          otherwise));
-    return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
+    return a_rgb(srcover, colors);
 }
 XFERMODE(ColorBurn) {
-    auto sa = s.alphas(),
-         da = d.alphas(),
+    auto sa = alphas(s),
+         da = alphas(d),
         isa = Sk4f(1)-sa,
         ida = Sk4f(1)-da;

@ -142,11 +150,11 @@ XFERMODE(ColorBurn) {
    auto colors = (d ==      da).thenElse(dstover,
                  (s == Sk4f(0)).thenElse(srcover,
                                          otherwise));
-    return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
+    return a_rgb(srcover, colors);
 }
 XFERMODE(SoftLight) {
-    auto sa = s.alphas(),
-         da = d.alphas(),
+    auto sa = alphas(s),
+         da = alphas(d),
         isa = Sk4f(1)-sa,
         ida = Sk4f(1)-da;

@ -167,7 +175,7 @@ XFERMODE(SoftLight) {
    auto alpha  = s + d*isa;
    auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc);           // Case 1 or 2/3?

-    return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
+    return a_rgb(alpha, colors);
 }
 #undef XFERMODE

@ -232,10 +240,10 @@ private:
    typedef SkProcCoeffXfermode INHERITED;
 };

-class SkPMFloatXfermode : public SkProcCoeffXfermode {
+class Sk4fXfermode : public SkProcCoeffXfermode {
 public:
-    typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat);
-    SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
+    typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
+    Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
        : INHERITED(rec, mode)
        , fProcF(procf) {}

@ -256,18 +264,26 @@ public:
    }

 private:
+    static Sk4f Load(SkPMColor c) {
+        return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
+    }
+    static SkPMColor Round(const Sk4f& f) {
+        SkPMColor c;
+        (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
+        return c;
+    }
    inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
-        return fProcF(SkPMFloat(dst), SkPMFloat(src)).round();
+        return Round(fProcF(Load(dst), Load(src)));
    }

    inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
-        SkPMFloat s(src),
-                  d(dst),
-                  b(fProcF(d,s));
+        Sk4f s(Load(src)),
+             d(Load(dst)),
+             b(fProcF(d,s));
        // We do aa in full float precision before going back down to bytes, because we can!
-        SkPMFloat a = Sk4f(aa) * Sk4f(1.0f/255);
+        Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
        b = b*a + d*(Sk4f(1)-a);
-        return b.round();
+        return Round(b);
    }

    ProcF fProcF;
@ -280,9 +296,8 @@ namespace SK_OPTS_NS {

 static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
    switch (mode) {
-#define CASE(Mode)                   \
-    case SkXfermode::k##Mode##_Mode: \
-        return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
+#define CASE(Mode) \
+    case SkXfermode::k##Mode##_Mode: return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
        CASE(Clear);
        CASE(Src);
        CASE(Dst);
@ -307,9 +322,8 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
        CASE(Lighten);
    #undef CASE

-#define CASE(Mode)                   \
-    case SkXfermode::k##Mode##_Mode: \
-        return new SkPMFloatXfermode(rec, mode, &Mode)
+#define CASE(Mode) \
+    case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
        CASE(ColorDodge);
        CASE(ColorBurn);
        CASE(SoftLight);
--- a/tests/PMFloatTest.cpp
+++ b/tests/PMFloatTest.cpp
@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkPMFloat.h"
-#include "Test.h"
-
-DEF_TEST(SkPMFloat, r) {
-    // Test SkPMColor <-> SkPMFloat
-    SkPMColor c = SkPreMultiplyColor(0xFFCC9933);
-    SkPMFloat pmf(c);
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*pmf.a()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(204.0f, 255*pmf.r()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*pmf.g()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual( 51.0f, 255*pmf.b()));
-    REPORTER_ASSERT(r, c == pmf.round());
-
-    // Test rounding.
-    pmf = SkPMFloat(254.5f/255, 203.5f/255, 153.1f/255, 50.8f/255);
-    REPORTER_ASSERT(r, c == pmf.round());
-
-    SkPMFloat clamped(SkPMFloat(510.0f/255, 153.0f/255, 1.0f/255, -0.2f/255).round());
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, 255*clamped.a()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(153.0f, 255*clamped.r()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(  1.0f, 255*clamped.g()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(  0.0f, 255*clamped.b()));
-
-    // Test SkPMFloat <-> Sk4f conversion.
-    Sk4f fs = clamped;
-    SkPMFloat scaled = fs * Sk4f(0.25f);
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(63.75f, 255*scaled.a()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual(38.25f, 255*scaled.r()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.25f, 255*scaled.g()));
-    REPORTER_ASSERT(r, SkScalarNearlyEqual( 0.00f, 255*scaled.b()));
-}