SkRasterPipeline::compile().

I'm not yet caching these in the blitter, and speed is essentially unchanged in the bench where I am now building and compiling the pipeline only once. This may not be able to stay a simple std::function after I figure out caching, but for now it's a nice fit. GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3911 Change-Id: I9545af589f73baf9f17cb4e6ace9a814c2478fe9 Reviewed-on: https://skia-review.googlesource.com/3911 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
2016-10-25 13:31:21 -04:00 · 2016-10-25 13:31:21 -04:00 · e9f74b89c0
commit e9f74b89c0
parent aebfb45104
10 changed files with 65 additions and 60 deletions
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@ -32,16 +32,18 @@ public:
    }

    void onDraw(int loops, SkCanvas*) override {
+        SkRasterPipeline p;
+        p.append(SkRasterPipeline::load_s_srgb, src);
+        p.append(SkRasterPipeline::   scale_u8, mask);
+        p.append(kF16 ? SkRasterPipeline::load_d_f16
+                      : SkRasterPipeline::load_d_srgb, dst);
+        p.append(SkRasterPipeline::    srcover);
+        p.append(kF16 ? SkRasterPipeline::store_f16
+                      : SkRasterPipeline::store_srgb, dst);
+        auto compiled = p.compile();
+
        while (loops --> 0) {
-            SkRasterPipeline p;
-            p.append(SkRasterPipeline::load_s_srgb, src);
-            p.append(SkRasterPipeline::   scale_u8, mask);
-            p.append(kF16 ? SkRasterPipeline::load_d_f16
-                          : SkRasterPipeline::load_d_srgb, dst);
-            p.append(SkRasterPipeline::    srcover);
-            p.append(kF16 ? SkRasterPipeline::store_f16
-                          : SkRasterPipeline::store_srgb, dst);
-            p.run(N);
+            compiled(0, N);
        }
    }
 };
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@ -89,7 +89,7 @@ namespace SkOpts {

    DEFINE_DEFAULT(hash_fn);

-    DEFINE_DEFAULT(run_pipeline);
+    DEFINE_DEFAULT(compile_pipeline);
 #undef DEFINE_DEFAULT

    // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@ -12,6 +12,7 @@
 #include "SkTextureCompressor.h"
 #include "SkTypes.h"
 #include "SkXfermode.h"
+#include <functional>

 struct ProcCoeff;

@ -73,7 +74,8 @@ namespace SkOpts {
        return hash_fn(data, bytes, seed);
    }

-    extern void (*run_pipeline)(size_t, size_t, const SkRasterPipeline::Stage*, int);
+    extern
+    std::function<void(size_t, size_t)> (*compile_pipeline)(const SkRasterPipeline::Stage*, int);
 }

 #endif//SkOpts_DEFINED
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@ -22,6 +22,6 @@ void SkRasterPipeline::extend(const SkRasterPipeline& src) {
    }
 }

-void SkRasterPipeline::run(size_t x, size_t n) const {
-    SkOpts::run_pipeline(x,n, fStages, fNum);
+std::function<void(size_t, size_t)> SkRasterPipeline::compile() const {
+    return SkOpts::compile_pipeline(fStages, fNum);
 }
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@ -11,6 +11,7 @@
 #include "SkNx.h"
 #include "SkTArray.h"
 #include "SkTypes.h"
+#include <functional>

 /**
 * SkRasterPipeline provides a cheap way to chain together a pixel processing pipeline.
@ -85,11 +86,8 @@ public:
    // Append all stages to this pipeline.
    void extend(const SkRasterPipeline&);

-    // Run the pipeline constructed with append(), walking x through [x,x+n),
-    // generally in 4-pixel steps, with perhaps one jagged tail step.
-    void run(size_t x, size_t n) const;
-    void run(size_t n) const { this->run(0, n); }
-
+    // Runs the pipeline walking x through [x,x+n).
+    std::function<void(size_t x, size_t n)> compile() const;

    struct Stage {
        StockStage stage;
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@ -149,6 +149,8 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const
    }
 }

+// TODO: Figure out how to cache some of the compiled pipelines.
+
 void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
    auto dst = fDst.writable_addr(0,y);

@ -159,7 +161,7 @@ void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
    p.extend(fXfermode);
    this->append_store(&p, dst);

-    p.run(x, w);
+    p.compile()(x,w);
 }

 void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
@ -176,7 +178,7 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const

    for (int16_t run = *runs; run > 0; run = *runs) {
        coverage = *aa * (1/255.0f);
-        p.run(x, run);
+        p.compile()(x, run);

        x    += run;
        runs += run;
@ -210,6 +212,6 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
        }
        this->append_store(&p, dst);

-        p.run(x, clip.width());
+        p.compile()(x, clip.width());
    }
 }
--- a/src/opts/SkOpts_hsw.cpp
+++ b/src/opts/SkOpts_hsw.cpp
@ -12,7 +12,7 @@

 namespace SkOpts {
    void Init_hsw() {
-        run_pipeline = hsw::run_pipeline;
+        compile_pipeline = hsw::compile_pipeline;
    }
 }

--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@ -20,6 +20,6 @@ namespace SkOpts {
        box_blur_yx          = sse41::box_blur_yx;
        srcover_srgb_srgb    = sse41::srcover_srgb_srgb;
        blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;
-        run_pipeline         = sse41::run_pipeline;
+        compile_pipeline     = sse41::compile_pipeline;
    }
 }
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@ -458,46 +458,47 @@ SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {

 namespace SK_OPTS_NS {

-    SI void run_pipeline(size_t x, size_t n,
-                         const SkRasterPipeline::Stage* stages, int nstages) {
-        SkASSERT(nstages <= SkRasterPipeline::kMaxStages);
-        if (nstages == 0) {
-            return;
-        }
+    SI std::function<void(size_t, size_t)> compile_pipeline(const SkRasterPipeline::Stage* stages,
+                                                            int nstages) {
+        struct Compiled {
+            Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
+                if (nstages == 0) {
+                    return;
+                }

-        SkNf v;  // Fastest to start uninitialized.
-
-        if (n >= N) {
-            BodyStage body[SkRasterPipeline::kMaxStages];
-
-            Body start = enum_to_Fn<Body>(stages[0].stage);
-            for (int i = 0; i < nstages-1; i++) {
-                body[i].next = enum_to_Fn<Body>(stages[i+1].stage);
-                body[i].ctx  = stages[i].ctx;
+                fBodyStart = enum_to_Fn<Body>(stages[0].stage);
+                fTailStart = enum_to_Fn<Tail>(stages[0].stage);
+                for (int i = 0; i < nstages-1; i++) {
+                    fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
+                    fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
+                    fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
+                }
+                fBody[nstages-1].next = just_return;
+                fTail[nstages-1].next = just_return;
+                fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
            }
-            body[nstages-1].next = just_return;
-            body[nstages-1].ctx  = stages[nstages-1].ctx;

-            do {
-                start(body, x, v,v,v,v, v,v,v,v);
-                x += N;
-                n -= N;
-            } while (n >= N);
-        }
+            void operator()(size_t x, size_t n) {
+                SkNf v;  // Fastest to start uninitialized.

-        if (n > 0) {
-            TailStage tail[SkRasterPipeline::kMaxStages];
-
-            Tail start = enum_to_Fn<Tail>(stages[0].stage);
-            for (int i = 0; i < nstages-1; i++) {
-                tail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
-                tail[i].ctx  = stages[i].ctx;
+                while (n >= N) {
+                    fBodyStart(fBody, x, v,v,v,v, v,v,v,v);
+                    x += N;
+                    n -= N;
+                }
+                if (n) {
+                    fTailStart(fTail, x,n, v,v,v,v, v,v,v,v);
+                }
            }
-            tail[nstages-1].next = just_return;
-            tail[nstages-1].ctx  = stages[nstages-1].ctx;

-            start(tail, x,n, v,v,v,v, v,v,v,v);
-        }
+            Body fBodyStart = just_return;
+            Tail fTailStart = just_return;
+
+            BodyStage fBody[SkRasterPipeline::kMaxStages];
+            TailStage fTail[SkRasterPipeline::kMaxStages];
+
+        } fn { stages, nstages };
+        return fn;
    }

 }  // namespace SK_OPTS_NS
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@ -21,7 +21,7 @@ DEF_TEST(SkRasterPipeline, r) {
    p.append(SkRasterPipeline::load_d_f16, &red);
    p.append(SkRasterPipeline::srcover);
    p.append(SkRasterPipeline::store_f16, &result);
-    p.run(1);
+    p.compile()(0, 1);

    // We should see half-intensity magenta.
    REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
@ -33,7 +33,7 @@ DEF_TEST(SkRasterPipeline, r) {
 DEF_TEST(SkRasterPipeline_empty, r) {
    // No asserts... just a test that this is safe to run.
    SkRasterPipeline p;
-    p.run(20);
+    p.compile()(0,20);
 }

 DEF_TEST(SkRasterPipeline_nonsense, r) {
@ -41,5 +41,5 @@ DEF_TEST(SkRasterPipeline_nonsense, r) {
    // srcover() calls st->next(); this makes sure we've always got something there to call.
    SkRasterPipeline p;
    p.append(SkRasterPipeline::srcover);
-    p.run(20);
+    p.compile()(0, 20);
 }