SkSplicer: test and fix loop logic

The new test would fail without the the change in SkSplicer.cpp to call fSpliced(x,x+body) instead of fSpliced(x,body). The rest of the changes are cosmetic, mostly renaming n to limit. Change-Id: Iae28802d0adb91e962ed3ee60fa5a4334bd140f9 Reviewed-on: https://skia-review.googlesource.com/6837 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-01-10 13:42:51 -05:00 · 2017-01-10 13:42:51 -05:00 · 7ba89a15fc
commit 7ba89a15fc
parent 4641d7d055
3 changed files with 49 additions and 16 deletions
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@ -153,7 +153,11 @@ namespace {
            // Put the address of kConstants in rcx/x3, Stage argument 4 "k".
            set_k(&buf, &kConstants);

-            // We'll loop back to here as long as x<n after x += kStride.
+            // Our loop is the equivalent of this C++ code:
+            //    do {
+            //        ... run spliced stages...
+            //        x += kStride;
+            //    } while(x < limit);
            iaca_start(&buf);
            auto loop_start = buf.bytesWritten();  // Think of this like a label, loop_start:

@ -216,13 +220,11 @@ namespace {

        // Here's where we call fSpliced if we created it, fBackup if not.
        void operator()(size_t x, size_t y, size_t n) const {
-            // TODO: The looping logic is probably not correct for n < kStride tails or x != 0.
-
            size_t body = n/kStride*kStride;   // Largest multiple of kStride (4 or 8) <= n.
            if (fSpliced && body) {            // Can we run fSpliced for at least one kStride?
                // TODO: At some point we will want to pass in y...
-                using Fn = void(size_t x, size_t n);
-                ((Fn*)fSpliced)(x,body);
+                using Fn = void(size_t x, size_t limit);
+                ((Fn*)fSpliced)(x, x+body);

                // Fall through to fBackup for any n<kStride last pixels.
                x += body;
--- a/src/splicer/SkSplicer_stages.cpp
+++ b/src/splicer/SkSplicer_stages.cpp
@ -62,14 +62,14 @@ AI static U32 expand(U8  v) { return __builtin_convertvector(     v, U32); }

 // Stages all fit a common interface that allows SkSplicer to splice them together.
 using K = const SkSplicer_constants;
-using Stage = void(size_t x, size_t n, void* ctx, K* constants, F,F,F,F, F,F,F,F);
+using Stage = void(size_t x, size_t limit, void* ctx, K* k, F,F,F,F, F,F,F,F);

 // Stage's arguments act as the working set of registers within the final spliced function.
 // Here's a little primer on the x86-64/aarch64 ABIs:
-//   x:         rdi/x0          x and n work to drive the loop, like for (; x < n; x += 4 or 8)
-//   n:         rsi/x1
-//   ctx:       rdx/x2          Look for movabsq_rdx in SkSplicer.cpp to see how this works.
-//   constants: rcx/x3          Look for movabsq_rcx in SkSplicer.cpp to see how this works.
+//   x:         rdi/x0          x and limit work to drive the loop, see loop_start in SkSplicer.cpp.
+//   limit:     rsi/x1
+//   ctx:       rdx/x2          Look for set_ctx in SkSplicer.cpp to see how this works.
+//   k:         rcx/x3          Look for set_k in SkSplicer.cpp to see how this works.
 //   vectors:   ymm0-ymm7/v0-v7


@ -85,14 +85,14 @@ C void done(size_t, size_t, void*, K*, F,F,F,F, F,F,F,F);
 // This should feel familiar to anyone who's read SkRasterPipeline_opts.h.
 // It's just a convenience to make a valid, spliceable Stage, nothing magic.
 #define STAGE(name)                                                              \
-    AI static void name##_k(size_t x, size_t n, void* ctx, K* k,                 \
+    AI static void name##_k(size_t x, size_t limit, void* ctx, K* k,             \
                            F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
-    C void name(size_t x, size_t n, void* ctx, K* k,                             \
+    C void name(size_t x, size_t limit, void* ctx, K* k,                         \
                F r, F g, F b, F a, F dr, F dg, F db, F da) {                    \
-        name##_k(x,n,ctx,k, r,g,b,a, dr,dg,db,da);                               \
-        done    (x,n,ctx,k, r,g,b,a, dr,dg,db,da);                               \
+        name##_k(x,limit,ctx,k, r,g,b,a, dr,dg,db,da);                           \
+        done    (x,limit,ctx,k, r,g,b,a, dr,dg,db,da);                           \
    }                                                                            \
-    AI static void name##_k(size_t x, size_t n, void* ctx, K* k,                 \
+    AI static void name##_k(size_t x, size_t limit, void* ctx, K* k,             \
                            F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)

 // We can now define Stages!
@ -128,7 +128,7 @@ STAGE(srcover) {
    b = fma(db, A, b);
    a = fma(db, A, a);
 }
-STAGE(dstover) { srcover_k(x,n,ctx,k, dr,dg,db,da, r,g,b,a); }
+STAGE(dstover) { srcover_k(x,limit,ctx,k, dr,dg,db,da, r,g,b,a); }

 STAGE(clamp_0) {
    r = max(r, 0);
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@ -59,3 +59,34 @@ DEF_TEST(SkRasterPipeline_nonsense, r) {
    p.append(SkRasterPipeline::srcover);
    p.run(0,0, 20);
 }
+
+DEF_TEST(SkRasterPipeline_JIT, r) {
+    // This tests a couple odd corners that a JIT backend can stumble over.
+
+    uint32_t buf[72] = {
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
+        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    };
+
+    const uint32_t* src = buf +  0;
+    uint32_t*       dst = buf + 36;
+
+    // Copy buf[x] to buf[x+36] for x in [15,35).
+    SkRasterPipeline p;
+    p.append(SkRasterPipeline:: load_8888, &src);
+    p.append(SkRasterPipeline::store_8888, &dst);
+    auto fn = p.compile();
+    fn(15, 0, 20);
+
+    for (int i = 0; i < 36; i++) {
+        if (i < 15 || i == 35) {
+            REPORTER_ASSERT(r, dst[i] == 0);
+        } else {
+            REPORTER_ASSERT(r, dst[i] == (uint32_t)(i - 11));
+        }
+    }
+}