start on raster pipeline 2d mode

- Add run_2d(x,y,w,h) and start_pipeline_2d().
  - Add and test a 2d-compatible store_8888_2d stage.

Change-Id: Ib9c225d1b8cb40471ae4333df1d06eec4d506f8a
Reviewed-on: https://skia-review.googlesource.com/24401
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Florin Malita <fmalita@chromium.org>
This commit is contained in:
Mike Klein 2017-07-18 11:30:25 -04:00 committed by Skia Commit-Bot
parent 135e446b9f
commit 3b92b6907a
8 changed files with 9581 additions and 7621 deletions

View File

@ -17,6 +17,7 @@
#include <vector>
struct SkJumper_constants;
struct SkJumper_Engine;
struct SkPM4f;
/**
@ -91,8 +92,8 @@ struct SkPM4f;
M(xy_to_2pt_conical_linear) \
M(mask_2pt_conical_degenerates) M(apply_vector_mask) \
M(byte_tables) M(byte_tables_rgb) \
M(rgb_to_hsl) \
M(hsl_to_rgb)
M(rgb_to_hsl) M(hsl_to_rgb) \
M(store_8888_2d)
class SkRasterPipeline {
public:
@ -120,6 +121,9 @@ public:
// Runs the pipeline walking x through [x,x+n).
void run(size_t x, size_t y, size_t n) const;
// Runs the pipeline in 2d from (x,y) inclusive to (x+w,y+h) exclusive.
void run_2d(size_t x, size_t y, size_t w, size_t h) const;
// Allocates a thunk which amortizes run() setup cost in alloc.
std::function<void(size_t, size_t, size_t)> compile() const;
@ -140,15 +144,13 @@ public:
bool empty() const { return fStages == nullptr; }
private:
using StartPipelineFn = void(size_t,size_t,size_t,void**,const SkJumper_constants*);
struct StageList {
StageList* prev;
StockStage stage;
void* ctx;
};
StartPipelineFn* build_pipeline(void**) const;
const SkJumper_Engine& build_pipeline(void**) const;
void unchecked_append(StockStage, void*);
SkArenaAlloc* fAlloc;

View File

@ -58,9 +58,10 @@ static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
#endif
// We can't express the real types of most stage functions portably, so we use a stand-in.
// We'll only ever call start_pipeline(), which then chains into the rest for us.
using StageFn = void(void);
using StartPipelineFn = void(size_t,size_t,size_t,void**,K*);
// We'll only ever call start_pipeline() or start_pipeline_2d(), which then chain into the rest.
using StageFn = void(void);
using StartPipelineFn = void(size_t,size_t,size_t, void**,K*);
using StartPipeline2dFn = void(size_t,size_t,size_t,size_t, void**,K*);
// Some platforms expect C "name" maps to asm "_name", others to "name".
#if defined(__APPLE__)
@ -106,14 +107,16 @@ extern "C" {
// We'll just run portable code.
#elif defined(__aarch64__)
StartPipelineFn ASM(start_pipeline,aarch64);
StartPipelineFn ASM(start_pipeline ,aarch64);
StartPipeline2dFn ASM(start_pipeline_2d,aarch64);
StageFn ASM(just_return,aarch64);
#define M(st) StageFn ASM(st,aarch64);
SK_RASTER_PIPELINE_STAGES(M)
#undef M
#elif defined(__arm__)
StartPipelineFn ASM(start_pipeline,vfp4);
StartPipelineFn ASM(start_pipeline ,vfp4);
StartPipeline2dFn ASM(start_pipeline_2d,vfp4);
StageFn ASM(just_return,vfp4);
#define M(st) StageFn ASM(st,vfp4);
SK_RASTER_PIPELINE_STAGES(M)
@ -127,6 +130,13 @@ extern "C" {
ASM(start_pipeline,hsw_lowp ),
ASM(start_pipeline,ssse3_lowp);
StartPipeline2dFn ASM(start_pipeline_2d,hsw ),
ASM(start_pipeline_2d,avx ),
ASM(start_pipeline_2d,sse41 ),
ASM(start_pipeline_2d,sse2 ),
ASM(start_pipeline_2d,hsw_lowp ),
ASM(start_pipeline_2d,ssse3_lowp);
StageFn ASM(just_return,hsw),
ASM(just_return,avx),
ASM(just_return,sse41),
@ -156,7 +166,8 @@ extern "C" {
#elif (defined(__i386__) || defined(_M_IX86)) && \
!(defined(_MSC_VER) && defined(SK_SUPPORT_LEGACY_WIN32_JUMPER))
StartPipelineFn ASM(start_pipeline,sse2);
StartPipelineFn ASM(start_pipeline ,sse2);
StartPipeline2dFn ASM(start_pipeline_2d,sse2);
StageFn ASM(just_return,sse2);
#define M(st) StageFn ASM(st,sse2);
SK_RASTER_PIPELINE_STAGES(M)
@ -165,7 +176,8 @@ extern "C" {
#endif
// Portable, single-pixel stages.
StartPipelineFn sk_start_pipeline;
StartPipelineFn sk_start_pipeline;
StartPipeline2dFn sk_start_pipeline_2d;
StageFn sk_just_return;
#define M(st) StageFn sk_##st;
SK_RASTER_PIPELINE_STAGES(M)
@ -192,9 +204,10 @@ extern "C" {
// Engines comprise everything we need to run SkRasterPipelines.
struct SkJumper_Engine {
StageFn* stages[kNumStages];
StartPipelineFn* start_pipeline;
StageFn* just_return;
StageFn* stages[kNumStages];
StartPipelineFn* start_pipeline;
StartPipeline2dFn* start_pipeline_2d;
StageFn* just_return;
};
// We'll default to this portable engine, but try to choose a better one at runtime.
@ -203,6 +216,7 @@ static const SkJumper_Engine kPortable = {
{ SK_RASTER_PIPELINE_STAGES(M) },
#undef M
sk_start_pipeline,
sk_start_pipeline_2d,
sk_just_return,
};
static SkJumper_Engine gEngine = kPortable;
@ -216,7 +230,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, aarch64),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
@ -225,7 +241,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, vfp4),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -235,7 +253,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, hsw),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -243,7 +263,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, avx),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -251,7 +273,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, sse41),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -259,7 +283,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, sse2),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -270,7 +296,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(stage) ASM(stage, sse2),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
M(start_pipeline)
M(start_pipeline_2d)
M(just_return)
#undef M
};
}
@ -286,6 +314,7 @@ static SkJumper_Engine choose_engine() {
#undef M
nullptr,
nullptr,
nullptr,
};
static SkJumper_Engine gLowp = kNone;
static SkOnce gChooseLowpOnce;
@ -296,8 +325,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(st) hsw_lowp<SkRasterPipeline::st>(),
{ SK_RASTER_PIPELINE_STAGES(M) },
ASM(start_pipeline,hsw_lowp),
ASM(just_return,hsw_lowp)
ASM(start_pipeline ,hsw_lowp),
ASM(start_pipeline_2d,hsw_lowp),
ASM(just_return ,hsw_lowp)
#undef M
};
}
@ -305,8 +335,9 @@ static SkJumper_Engine choose_engine() {
return {
#define M(st) ssse3_lowp<SkRasterPipeline::st>(),
{ SK_RASTER_PIPELINE_STAGES(M) },
ASM(start_pipeline,ssse3_lowp),
ASM(just_return,ssse3_lowp)
ASM(start_pipeline ,ssse3_lowp),
ASM(start_pipeline_2d,ssse3_lowp),
ASM(just_return ,ssse3_lowp)
#undef M
};
}
@ -315,7 +346,7 @@ static SkJumper_Engine choose_engine() {
}
#endif
StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
#ifndef SK_DISABLE_SSSE3_RUNTIME_CHECK_FOR_LOWP_STAGES
gChooseLowpOnce([]{ gLowp = choose_lowp(); });
@ -338,7 +369,7 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
}
}
if (ip != reset_point) {
return gLowp.start_pipeline;
return gLowp;
}
#endif
@ -353,7 +384,7 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
}
*--ip = (void*)gEngine.stages[st->stage];
}
return gEngine.start_pipeline;
return gEngine;
}
void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
@ -364,8 +395,8 @@ void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
// Best to not use fAlloc here... we can't bound how often run() will be called.
SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
auto start_pipeline = this->build_pipeline(program.get() + fSlotsNeeded);
start_pipeline(x,y,x+n, program.get(), &kConstants);
const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
engine.start_pipeline(x,y,x+n, program.get(), &kConstants);
}
std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
@ -374,9 +405,22 @@ std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
}
void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
auto start_pipeline = this->build_pipeline(program + fSlotsNeeded);
const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
auto start_pipeline = engine.start_pipeline;
return [=](size_t x, size_t y, size_t n) {
start_pipeline(x,y,x+n, program, &kConstants);
};
}
void SkRasterPipeline::run_2d(size_t x, size_t y, size_t w, size_t h) const {
if (this->empty()) {
return;
}
// Like in run(), it's best to not use fAlloc here... we can't bound how often we'll be called.
SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
engine.start_pipeline_2d(x,y,x+w,y+h, program.get(), &kConstants);
}

View File

@ -117,4 +117,9 @@ struct SkJumper_2PtConicalCtx {
fDR;
};
struct SkJumper_PtrStride {
void* ptr;
size_t stride;
};
#endif//SkJumper_DEFINED

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -68,6 +68,22 @@ extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t limit, void** pr
}
}
#if defined(JUMPER) && defined(__AVX__)
// We really want to make sure all paths go through this function's (implicit) vzeroupper.
// If they don't, we'll experience severe slowdowns when we first use SSE instructions again.
__attribute__((disable_tail_calls))
#endif
#if defined(JUMPER)
__attribute__((flatten)) // Force-inline the call to start_pipeline().
#endif
MAYBE_MSABI
extern "C" void WRAP(start_pipeline_2d)(size_t x, size_t y, size_t xlimit, size_t ylimit,
void** program, K* k) {
for (; y < ylimit; y++) {
WRAP(start_pipeline)(x,y,xlimit, program, k);
}
}
#define STAGE(name) \
SI void name##_k(K* k, LazyCtx ctx, size_t x, size_t y, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
@ -910,6 +926,17 @@ STAGE(store_8888) {
store(ptr, px, tail);
}
STAGE(store_8888_2d) {
auto c = (const SkJumper_PtrStride*)ctx;
auto ptr = (uint32_t*)c->ptr + y*c->stride + x;
U32 px = round(r, 255.0f)
| round(g, 255.0f) << 8
| round(b, 255.0f) << 16
| round(a, 255.0f) << 24;
store(ptr, px, tail);
}
STAGE(load_bgra) {
auto ptr = *(const uint32_t**)ctx + x;
from_8888(load<U32>(ptr, tail), &b,&g,&r,&a);

View File

@ -81,6 +81,21 @@ extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t limit, void** pr
start(k,program,x,y,tail, v,v,v,v, v,v,v,v);
}
}
#if defined(__AVX__)
// We really want to make sure all paths go through this function's (implicit) vzeroupper.
// If they don't, we'll experience severe slowdowns when we first use SSE instructions again.
__attribute__((disable_tail_calls))
#endif
__attribute__((flatten)) // Force-inline the call to start_pipeline().
MAYBE_MSABI
extern "C" void WRAP(start_pipeline_2d)(size_t x, size_t y, size_t xlimit, size_t ylimit,
void** program, K* k) {
for (; y < ylimit; y++) {
WRAP(start_pipeline)(x,y,xlimit, program, k);
}
}
extern "C" void WRAP(just_return)(K*, void**, size_t,size_t,size_t, F,F,F,F, F,F,F,F) {}
#define STAGE(name) \

View File

@ -8,6 +8,7 @@
#include "Test.h"
#include "SkHalf.h"
#include "SkRasterPipeline.h"
#include "../src/jumper/SkJumper.h"
DEF_TEST(SkRasterPipeline, r) {
// Build and run a simple pipeline to exercise SkRasterPipeline,
@ -230,3 +231,33 @@ DEF_TEST(SkRasterPipeline_lowp, r) {
}
}
}
DEF_TEST(SkRasterPipeline_2d, r) {
uint32_t rgba[2*2] = {0,0,0,0};
SkSTArenaAlloc<256> alloc;
SkRasterPipeline p(&alloc);
// Splat out the (2d) dst coordinates: (0.5,0.5), (1.5,0.5), (0.5,1.5), (1.5,1.5).
p.append(SkRasterPipeline::seed_shader);
// Scale down to [0,1] range to write out as bytes.
p.append_matrix(&alloc, SkMatrix::Concat(SkMatrix::MakeScale(0.5f),
SkMatrix::MakeTrans(-0.5f, -0.5f)));
// Write out to rgba, with row stride = 2 pixels.
SkJumper_PtrStride ctx = { rgba, 2 };
p.append(SkRasterPipeline::store_8888_2d, &ctx);
p.run_2d(0,0, 2,2);
REPORTER_ASSERT(r, ((rgba[0] >> 0) & 0xff) == 0);
REPORTER_ASSERT(r, ((rgba[1] >> 0) & 0xff) == 128);
REPORTER_ASSERT(r, ((rgba[2] >> 0) & 0xff) == 0);
REPORTER_ASSERT(r, ((rgba[3] >> 0) & 0xff) == 128);
REPORTER_ASSERT(r, ((rgba[0] >> 8) & 0xff) == 0);
REPORTER_ASSERT(r, ((rgba[1] >> 8) & 0xff) == 0);
REPORTER_ASSERT(r, ((rgba[2] >> 8) & 0xff) == 128);
REPORTER_ASSERT(r, ((rgba[3] >> 8) & 0xff) == 128);
}