SkSplicer: test and fix loop logic

The new test would fail without the the change in SkSplicer.cpp to call fSpliced(x,x+body) instead of fSpliced(x,body).  The rest of the changes are cosmetic, mostly renaming n to limit.

Change-Id: Iae28802d0adb91e962ed3ee60fa5a4334bd140f9
Reviewed-on: https://skia-review.googlesource.com/6837
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2017-01-10 13:42:51 -05:00 committed by Skia Commit-Bot
parent 4641d7d055
commit 7ba89a15fc
3 changed files with 49 additions and 16 deletions

View File

@ -153,7 +153,11 @@ namespace {
// Put the address of kConstants in rcx/x3, Stage argument 4 "k".
set_k(&buf, &kConstants);
// We'll loop back to here as long as x<n after x += kStride.
// Our loop is the equivalent of this C++ code:
// do {
// ... run spliced stages...
// x += kStride;
// } while(x < limit);
iaca_start(&buf);
auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start:
@ -216,13 +220,11 @@ namespace {
// Here's where we call fSpliced if we created it, fBackup if not.
void operator()(size_t x, size_t y, size_t n) const {
// TODO: The looping logic is probably not correct for n < kStride tails or x != 0.
size_t body = n/kStride*kStride; // Largest multiple of kStride (4 or 8) <= n.
if (fSpliced && body) { // Can we run fSpliced for at least one kStride?
// TODO: At some point we will want to pass in y...
using Fn = void(size_t x, size_t n);
((Fn*)fSpliced)(x,body);
using Fn = void(size_t x, size_t limit);
((Fn*)fSpliced)(x, x+body);
// Fall through to fBackup for any n<kStride last pixels.
x += body;

View File

@ -62,14 +62,14 @@ AI static U32 expand(U8 v) { return __builtin_convertvector( v, U32); }
// Stages all fit a common interface that allows SkSplicer to splice them together.
using K = const SkSplicer_constants;
using Stage = void(size_t x, size_t n, void* ctx, K* constants, F,F,F,F, F,F,F,F);
using Stage = void(size_t x, size_t limit, void* ctx, K* k, F,F,F,F, F,F,F,F);
// Stage's arguments act as the working set of registers within the final spliced function.
// Here's a little primer on the x86-64/aarch64 ABIs:
// x: rdi/x0 x and n work to drive the loop, like for (; x < n; x += 4 or 8)
// n: rsi/x1
// ctx: rdx/x2 Look for movabsq_rdx in SkSplicer.cpp to see how this works.
// constants: rcx/x3 Look for movabsq_rcx in SkSplicer.cpp to see how this works.
// x: rdi/x0 x and limit work to drive the loop, see loop_start in SkSplicer.cpp.
// limit: rsi/x1
// ctx: rdx/x2 Look for set_ctx in SkSplicer.cpp to see how this works.
// k: rcx/x3 Look for set_k in SkSplicer.cpp to see how this works.
// vectors: ymm0-ymm7/v0-v7
@ -85,14 +85,14 @@ C void done(size_t, size_t, void*, K*, F,F,F,F, F,F,F,F);
// This should feel familiar to anyone who's read SkRasterPipeline_opts.h.
// It's just a convenience to make a valid, spliceable Stage, nothing magic.
#define STAGE(name) \
AI static void name##_k(size_t x, size_t n, void* ctx, K* k, \
AI static void name##_k(size_t x, size_t limit, void* ctx, K* k, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
C void name(size_t x, size_t n, void* ctx, K* k, \
C void name(size_t x, size_t limit, void* ctx, K* k, \
F r, F g, F b, F a, F dr, F dg, F db, F da) { \
name##_k(x,n,ctx,k, r,g,b,a, dr,dg,db,da); \
done (x,n,ctx,k, r,g,b,a, dr,dg,db,da); \
name##_k(x,limit,ctx,k, r,g,b,a, dr,dg,db,da); \
done (x,limit,ctx,k, r,g,b,a, dr,dg,db,da); \
} \
AI static void name##_k(size_t x, size_t n, void* ctx, K* k, \
AI static void name##_k(size_t x, size_t limit, void* ctx, K* k, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
// We can now define Stages!
@ -128,7 +128,7 @@ STAGE(srcover) {
b = fma(db, A, b);
a = fma(db, A, a);
}
STAGE(dstover) { srcover_k(x,n,ctx,k, dr,dg,db,da, r,g,b,a); }
STAGE(dstover) { srcover_k(x,limit,ctx,k, dr,dg,db,da, r,g,b,a); }
STAGE(clamp_0) {
r = max(r, 0);

View File

@ -59,3 +59,34 @@ DEF_TEST(SkRasterPipeline_nonsense, r) {
p.append(SkRasterPipeline::srcover);
p.run(0,0, 20);
}
DEF_TEST(SkRasterPipeline_JIT, r) {
// This tests a couple odd corners that a JIT backend can stumble over.
uint32_t buf[72] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const uint32_t* src = buf + 0;
uint32_t* dst = buf + 36;
// Copy buf[x] to buf[x+36] for x in [15,35).
SkRasterPipeline p;
p.append(SkRasterPipeline:: load_8888, &src);
p.append(SkRasterPipeline::store_8888, &dst);
auto fn = p.compile();
fn(15, 0, 20);
for (int i = 0; i < 36; i++) {
if (i < 15 || i == 35) {
REPORTER_ASSERT(r, dst[i] == 0);
} else {
REPORTER_ASSERT(r, dst[i] == (uint32_t)(i - 11));
}
}
}