Add Matrix colorfilter pipeline stages.

This breaks the color filter down into a couple logical steps: - go to unpremul - apply the 4x5 matrix - clamp to [0,1] - go to premul Because we already have handy premul clamp stages, we swap the order of clamp and premul. This is lossless. While adding our stages to the pipeline, we analyze the matrix to see if we can skip any steps: - we can skip unpremul if the shader is opaque (alphas are all 1 ~~~> we're already unpremul); - we can skip the premul back if the color filter always produces opaque (here, are the inputs opaque and do we keep them that way, but we could also check for an explicit 0 0 0 0 1 alpha row); - we can skip the clamp_0 if the matrix can never produce a value less than 0; - we can skip the clamp_1 if the matrix can never produce a value greater than 1. The only thing that should seem missing is per-pixel alpha checks. We don't do those here, but instead make up for it by operating on 4-8 pixels at a time. We don't split the 4x5 matrix into a 4x4 and 1x4 translate. We could, but when we have FMA (new x86, all ARMv8) we might as well work the translate for free into the FMAs. This makes gm/fadefilter.cpp draw differently in sRGB and F16 modes, bringing them in line with the GPU sRGB and GPU f16 configs. It's unclear to me what was wrong with the old CPU implementation. GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4346 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I14082ded8fb8d63354167d9e6b3f8058f840253e Reviewed-on: https://skia-review.googlesource.com/4346 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
2016-11-03 10:20:35 -04:00 · 2016-11-03 10:20:35 -04:00 · eea7c16d59
commit eea7c16d59
parent 70ac8a9d09
11 changed files with 75 additions and 18 deletions
--- a/include/core/SkColorFilter.h
+++ b/include/core/SkColorFilter.h
@ -72,7 +72,7 @@ public:

    virtual void filterSpan4f(const SkPM4f src[], int count, SkPM4f result[]) const;

-    bool appendStages(SkRasterPipeline*) const;
+    bool appendStages(SkRasterPipeline*, bool shaderIsOpaque) const;

    enum Flags {
        /** If set the filter methods will not change the alpha channel of the colors.
@ -163,7 +163,7 @@ public:
 protected:
    SkColorFilter() {}

-    virtual bool onAppendStages(SkRasterPipeline*) const;
+    virtual bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const;

 private:
    /*
--- a/include/effects/SkLumaColorFilter.h
+++ b/include/effects/SkLumaColorFilter.h
@ -42,7 +42,7 @@ protected:

 private:
    SkLumaColorFilter();
-    bool onAppendStages(SkRasterPipeline*) const override;
+    bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;

    typedef SkColorFilter INHERITED;
 };
--- a/src/core/SkColorFilter.cpp
+++ b/src/core/SkColorFilter.cpp
@ -37,11 +37,11 @@ sk_sp<GrFragmentProcessor> SkColorFilter::asFragmentProcessor(GrContext*, SkColo
 }
 #endif

-bool SkColorFilter::appendStages(SkRasterPipeline* pipeline) const {
-    return this->onAppendStages(pipeline);
+bool SkColorFilter::appendStages(SkRasterPipeline* pipeline, bool shaderIsOpaque) const {
+    return this->onAppendStages(pipeline, shaderIsOpaque);
 }

-bool SkColorFilter::onAppendStages(SkRasterPipeline*) const {
+bool SkColorFilter::onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const {
    return false;
 }

--- a/src/core/SkColorMatrixFilterRowMajor255.cpp
+++ b/src/core/SkColorMatrixFilterRowMajor255.cpp
@ -9,6 +9,7 @@
 #include "SkColorPriv.h"
 #include "SkNx.h"
 #include "SkPM4fPriv.h"
+#include "SkRasterPipeline.h"
 #include "SkReadBuffer.h"
 #include "SkRefCnt.h"
 #include "SkString.h"
@ -230,6 +231,30 @@ static void set_concat(SkScalar result[20], const SkScalar outer[20], const SkSc
 //  End duplication
 //////

+bool SkColorMatrixFilterRowMajor255::onAppendStages(SkRasterPipeline* p,
+                                                    bool shaderIsOpaque) const {
+    bool willStayOpaque = shaderIsOpaque && (fFlags & kAlphaUnchanged_Flag);
+    bool needsClamp0 = false,
+         needsClamp1 = false;
+    for (int i = 0; i < 4; i++) {
+        SkScalar min = fTranspose[i+16],
+                 max = fTranspose[i+16];
+        (fTranspose[i+ 0] < 0 ? min : max) += fTranspose[i+ 0];
+        (fTranspose[i+ 4] < 0 ? min : max) += fTranspose[i+ 4];
+        (fTranspose[i+ 8] < 0 ? min : max) += fTranspose[i+ 8];
+        (fTranspose[i+12] < 0 ? min : max) += fTranspose[i+12];
+        needsClamp0 = needsClamp0 || min < 0;
+        needsClamp1 = needsClamp1 || max > 1;
+    }
+
+    if (!shaderIsOpaque) { p->append(SkRasterPipeline::unpremul); }
+    if (           true) { p->append(SkRasterPipeline::matrix_4x5, fTranspose); }
+    if (!willStayOpaque) { p->append(SkRasterPipeline::premul); }
+    if (    needsClamp0) { p->append(SkRasterPipeline::clamp_0); }
+    if (    needsClamp1) { p->append(SkRasterPipeline::clamp_a); }
+    return true;
+}
+
 sk_sp<SkColorFilter>
 SkColorMatrixFilterRowMajor255::makeComposed(sk_sp<SkColorFilter> innerFilter) const {
    SkScalar innerMatrix[20];
--- a/src/core/SkColorMatrixFilterRowMajor255.h
+++ b/src/core/SkColorMatrixFilterRowMajor255.h
@ -36,6 +36,8 @@ protected:
    void flatten(SkWriteBuffer&) const override;

 private:
+    bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;
+
    SkScalar        fMatrix[20];
    float           fTranspose[20]; // for Sk4s
    uint32_t        fFlags;
--- a/src/core/SkModeColorFilter.cpp
+++ b/src/core/SkModeColorFilter.cpp
@ -85,7 +85,7 @@ sk_sp<SkFlattenable> SkModeColorFilter::CreateProc(SkReadBuffer& buffer) {
    return SkColorFilter::MakeModeFilter(color, mode);
 }

-bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p) const {
+bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p, bool shaderIsOpaque) const {
    // TODO: For some modes we can cut a stage by loading the fPM4f into dr,dg,db,da
    // and applying the opposite xfermode, e.g. dst-in instead of src-in.
    p->append(SkRasterPipeline::swap_src_dst);
@ -94,7 +94,7 @@ bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p) const {
    if (!SkBlendMode_AppendStages(mode, p)) {
        return false;
    }
-    if (SkBlendMode_CanOverflow(mode)) { p->append(SkRasterPipeline::clamp_1); }
+    if (SkBlendMode_CanOverflow(mode)) { p->append(SkRasterPipeline::clamp_a); }
    return true;
 }

--- a/src/core/SkModeColorFilter.h
+++ b/src/core/SkModeColorFilter.h
@ -45,7 +45,7 @@ protected:

    void flatten(SkWriteBuffer&) const override;

-    bool onAppendStages(SkRasterPipeline*) const override;
+    bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;

 private:
    SkColor             fColor;
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@ -55,7 +55,8 @@
 // the Stage*.  This mostly matters on 64-bit Windows where every register is precious.

 #define SK_RASTER_PIPELINE_STAGES(M)                             \
-    M(swap_src_dst) M(constant_color) M(clamp_1)                 \
+    M(swap_src_dst) M(clamp_0) M(clamp_a) M(unpremul) M(premul)  \
+    M(constant_color)                                            \
    M(load_s_565)  M(load_d_565)  M(store_565)                   \
    M(load_s_srgb) M(load_d_srgb) M(store_srgb)                  \
    M(load_s_f16)  M(load_d_f16)  M(store_f16)                   \
@ -67,7 +68,7 @@
    M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_)  \
    M(colorburn) M(colordodge) M(darken) M(difference)           \
    M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
-    M(luminance_to_alpha)
+    M(luminance_to_alpha) M(matrix_4x5)

 class SkRasterPipeline {
 public:
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@ -99,12 +99,18 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
        return nullptr;  // TODO
    }

+    uint32_t paintColor = paint.getColor();
+    bool shaderIsOpaque = (paintColor >> 24) == 0xff;
+
    SkRasterPipeline shader, colorFilter;
-    if (paint.getColorFilter() && !paint.getColorFilter()->appendStages(&colorFilter)) {
+    if (paint.getShader()) {
+        shaderIsOpaque = paint.getShader()->isOpaque();
+    }
+    if ( paint.getColorFilter() &&
+        !paint.getColorFilter()->appendStages(&colorFilter, shaderIsOpaque)) {
        return nullptr;
    }

-    uint32_t paintColor = paint.getColor();

    SkColor4f color;
    if (dst.info().colorSpace()) {
@ -166,7 +172,7 @@ void SkRasterPipelineBlitter::append_blend(SkRasterPipeline* p) const {
 }

 void SkRasterPipelineBlitter::maybe_clamp(SkRasterPipeline* p) const {
-    if (SkBlendMode_CanOverflow(fBlend)) { p->append(SkRasterPipeline::clamp_1); }
+    if (SkBlendMode_CanOverflow(fBlend)) { p->append(SkRasterPipeline::clamp_a); }
 }

 void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
--- a/src/effects/SkLumaColorFilter.cpp
+++ b/src/effects/SkLumaColorFilter.cpp
@ -38,7 +38,7 @@ void SkLumaColorFilter::filterSpan(const SkPMColor src[], int count,
    }
 }

-bool SkLumaColorFilter::onAppendStages(SkRasterPipeline* p) const {
+bool SkLumaColorFilter::onAppendStages(SkRasterPipeline* p, bool shaderIsOpaque) const {
    p->append(SkRasterPipeline::luminance_to_alpha);
    return true;
 }
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@ -193,22 +193,32 @@ SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {

 STAGE(just_return, false) { }

-/*  We don't seem to have a need for this yet.
 STAGE(clamp_0, true) {
    a = SkNf::Max(a, 0.0f);
    r = SkNf::Max(r, 0.0f);
    g = SkNf::Max(g, 0.0f);
    b = SkNf::Max(b, 0.0f);
 }
-*/

-STAGE(clamp_1, true) {
+STAGE(clamp_a, true) {
    a = SkNf::Min(a, 1.0f);
    r = SkNf::Min(r, a);
    g = SkNf::Min(g, a);
    b = SkNf::Min(b, a);
 }

+STAGE(unpremul, true) {
+    r *= a.invert();
+    g *= a.invert();
+    b *= a.invert();
+}
+
+STAGE(premul, true) {
+    r *= a;
+    g *= a;
+    b *= a;
+}
+
 STAGE(swap_src_dst, true) {
    SkTSwap(r,dr);
    SkTSwap(g,dg);
@ -450,6 +460,19 @@ STAGE(luminance_to_alpha, true) {
    r = g = b = 0;
 }

+STAGE(matrix_4x5, true) {
+    auto m = (const float*)ctx;
+
+    auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
+    auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
+         G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
+         B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
+         A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
+    r = R;
+    g = G;
+    b = B;
+    a = A;
+}

 template <typename Fn>
 SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {