Rearrange SkRasterPipeline scanline tail handling.
We used to step at a 4-pixel stride as long as possible, then run up to 3 times, one pixel at a time. Now replace those 1-at-a-time runs with a single tail stamp if there are 1-3 remaining pixels. This style is simply more efficient: e.g. we'll blend and lerp once for 3 pixels instead of 3 times. This should make short blits significantly more efficient. It's also more future-oriented... AVX+ on Intel and SVE on ARM support masked loads and stores, so we can do the entire tail in one direct step. This also makes it possible to re-arrange the code a bit to encapsulate each stage better. I think generally this code reads more clearly than the old code, but YMMV. I've arranged things so you write one function, but it's compiled into two specializations, one for tail=0 (Body) and one for tail>0 (Tail). It's pretty tidy. For now I've just burned a register to pass around tail. It's 2 bits now, maybe soon 3 with AVX, and capped at 4 for even the craziest new toys, so there are plenty of places we can pack it if we want to get clever. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2717 Change-Id: I45852a3e5d4c5b5e9315302c46601aee0d32265f Reviewed-on: https://skia-review.googlesource.com/2717 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
b37eb0e968
commit
c8dd6bc3e7
@ -21,11 +21,28 @@ static uint8_t mask[N];
|
||||
// - load srgb dst
|
||||
// - src = srcover(dst, src)
|
||||
// - store src back as srgb
|
||||
// Every stage except for srcover interacts with memory, and so will need _tail variants.
|
||||
|
||||
SK_RASTER_STAGE(load_s_srgb) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
float rs[] = {0,0,0,0},
|
||||
gs[] = {0,0,0,0},
|
||||
bs[] = {0,0,0,0},
|
||||
as[] = {0,0,0,0};
|
||||
for (size_t i = 0; i < (tail&3); i++) {
|
||||
rs[i] = sk_linear_from_srgb[(ptr[i] >> 0) & 0xff];
|
||||
gs[i] = sk_linear_from_srgb[(ptr[i] >> 8) & 0xff];
|
||||
bs[i] = sk_linear_from_srgb[(ptr[i] >> 16) & 0xff];
|
||||
as[i] = (ptr[i] >> 24) * (1/255.0f);
|
||||
}
|
||||
r = Sk4f::Load(rs);
|
||||
g = Sk4f::Load(gs);
|
||||
b = Sk4f::Load(bs);
|
||||
a = Sk4f::Load(as);
|
||||
return;
|
||||
}
|
||||
|
||||
r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> 0) & 0xff],
|
||||
@ -40,22 +57,30 @@ SK_RASTER_STAGE(load_s_srgb) {
|
||||
sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
|
||||
|
||||
a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(load_s_srgb_tail) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
|
||||
g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
|
||||
b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
|
||||
a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(load_d_srgb) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
float rs[] = {0,0,0,0},
|
||||
gs[] = {0,0,0,0},
|
||||
bs[] = {0,0,0,0},
|
||||
as[] = {0,0,0,0};
|
||||
for (size_t i = 0; i < (tail&3); i++) {
|
||||
rs[i] = sk_linear_from_srgb[(ptr[i] >> 0) & 0xff];
|
||||
gs[i] = sk_linear_from_srgb[(ptr[i] >> 8) & 0xff];
|
||||
bs[i] = sk_linear_from_srgb[(ptr[i] >> 16) & 0xff];
|
||||
as[i] = (ptr[i] >> 24) * (1/255.0f);
|
||||
}
|
||||
dr = Sk4f::Load(rs);
|
||||
dg = Sk4f::Load(gs);
|
||||
db = Sk4f::Load(bs);
|
||||
da = Sk4f::Load(as);
|
||||
return;
|
||||
}
|
||||
|
||||
dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> 0) & 0xff],
|
||||
@ -74,29 +99,24 @@ SK_RASTER_STAGE(load_d_srgb) {
|
||||
da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(load_d_srgb_tail) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
|
||||
dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
|
||||
db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
|
||||
da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(scale_u8) {
|
||||
auto ptr = (const uint8_t*)ctx + x;
|
||||
|
||||
auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f);
|
||||
r *= c;
|
||||
g *= c;
|
||||
b *= c;
|
||||
a *= c;
|
||||
}
|
||||
Sk4b cov;
|
||||
|
||||
SK_RASTER_STAGE(scale_u8_tail) {
|
||||
auto ptr = (const uint8_t*)ctx + x;
|
||||
if (tail) {
|
||||
uint8_t cs[] = {0,0,0,0};
|
||||
switch (tail&3) {
|
||||
case 3: cs[2] = ptr[2];
|
||||
case 2: cs[1] = ptr[1];
|
||||
case 1: cs[0] = ptr[0];
|
||||
}
|
||||
cov = Sk4b::Load(cs);
|
||||
} else {
|
||||
cov = Sk4b::Load(ptr);
|
||||
}
|
||||
|
||||
auto c = *ptr * (1/255.0f);
|
||||
auto c = SkNx_cast<float>(cov) * (1/255.0f);
|
||||
r *= c;
|
||||
g *= c;
|
||||
b *= c;
|
||||
@ -114,19 +134,24 @@ SK_RASTER_STAGE(srcover) {
|
||||
SK_RASTER_STAGE(store_srgb) {
|
||||
auto ptr = (uint32_t*)ctx + x;
|
||||
|
||||
uint32_t* dst = nullptr;
|
||||
uint32_t stack[4];
|
||||
|
||||
if (tail) {
|
||||
dst = ptr;
|
||||
ptr = stack;
|
||||
}
|
||||
|
||||
( sk_linear_to_srgb(r)
|
||||
| sk_linear_to_srgb(g) << 8
|
||||
| sk_linear_to_srgb(b) << 16
|
||||
| Sk4f_round(255.0f*a) << 24).store(ptr);
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(store_srgb_tail) {
|
||||
auto ptr = (uint32_t*)ctx + x;
|
||||
|
||||
Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0});
|
||||
rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)};
|
||||
|
||||
SkNx_cast<uint8_t>(rgba).store(ptr);
|
||||
switch (tail&3) {
|
||||
case 3: dst[2] = ptr[2];
|
||||
case 2: dst[1] = ptr[1];
|
||||
case 1: dst[0] = ptr[0];
|
||||
}
|
||||
}
|
||||
|
||||
class SkRasterPipelineBench : public Benchmark {
|
||||
@ -147,35 +172,31 @@ public:
|
||||
Sk4f r,g,b,a, dr,dg,db,da;
|
||||
size_t x = 0, n = N;
|
||||
while (n >= 4) {
|
||||
load_s_srgb(src , x, r,g,b,a, dr,dg,db,da);
|
||||
scale_u8 (mask , x, r,g,b,a, dr,dg,da,da);
|
||||
load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da);
|
||||
srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
|
||||
store_srgb (dst , x, r,g,b,a, dr,dg,da,da);
|
||||
load_s_srgb(src , x,0, r,g,b,a, dr,dg,db,da);
|
||||
scale_u8 (mask , x,0, r,g,b,a, dr,dg,da,da);
|
||||
load_d_srgb(dst , x,0, r,g,b,a, dr,dg,da,da);
|
||||
srcover (nullptr, x,0, r,g,b,a, dr,dg,da,da);
|
||||
store_srgb (dst , x,0, r,g,b,a, dr,dg,da,da);
|
||||
|
||||
x += 4;
|
||||
n -= 4;
|
||||
}
|
||||
while (n > 0) {
|
||||
load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da);
|
||||
scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da);
|
||||
load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da);
|
||||
srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
|
||||
store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da);
|
||||
|
||||
x += 1;
|
||||
n -= 1;
|
||||
if (n > 0) {
|
||||
load_s_srgb(src , x,n, r,g,b,a, dr,dg,db,da);
|
||||
scale_u8 (mask , x,n, r,g,b,a, dr,dg,da,da);
|
||||
load_d_srgb(dst , x,n, r,g,b,a, dr,dg,da,da);
|
||||
srcover (nullptr, x,n, r,g,b,a, dr,dg,da,da);
|
||||
store_srgb (dst , x,n, r,g,b,a, dr,dg,da,da);
|
||||
}
|
||||
}
|
||||
|
||||
void runPipeline() {
|
||||
SkRasterPipeline p;
|
||||
p.append<load_s_srgb, load_s_srgb_tail>( src);
|
||||
p.append< scale_u8, scale_u8_tail>(mask);
|
||||
p.append<load_d_srgb, load_d_srgb_tail>( dst);
|
||||
p.append<srcover>();
|
||||
p.last< store_srgb, store_srgb_tail>( dst);
|
||||
|
||||
p.append<load_s_srgb>(src);
|
||||
p.append< scale_u8>(mask);
|
||||
p.append<load_d_srgb>(dst);
|
||||
p.append< srcover>();
|
||||
p.last < store_srgb>(dst);
|
||||
p.run(N);
|
||||
}
|
||||
|
||||
|
@ -9,29 +9,30 @@
|
||||
|
||||
SkRasterPipeline::SkRasterPipeline() {}
|
||||
|
||||
void SkRasterPipeline::append(SkRasterPipeline::Fn body_fn, const void* body_ctx,
|
||||
SkRasterPipeline::Fn tail_fn, const void* tail_ctx) {
|
||||
void SkRasterPipeline::append(SkRasterPipeline::Fn body,
|
||||
SkRasterPipeline::Fn tail,
|
||||
const void* ctx) {
|
||||
// Each stage holds its own context and the next function to call.
|
||||
// So the pipeline itself has to hold onto the first function that starts the pipeline.
|
||||
(fBody.empty() ? fBodyStart : fBody.back().fNext) = body_fn;
|
||||
(fTail.empty() ? fTailStart : fTail.back().fNext) = tail_fn;
|
||||
(fBody.empty() ? fBodyStart : fBody.back().fNext) = body;
|
||||
(fTail.empty() ? fTailStart : fTail.back().fNext) = tail;
|
||||
|
||||
// Each last stage starts with its next function set to JustReturn as a safety net.
|
||||
// It'll be overwritten by the next call to append().
|
||||
fBody.push_back({ &JustReturn, const_cast<void*>(body_ctx) });
|
||||
fTail.push_back({ &JustReturn, const_cast<void*>(tail_ctx) });
|
||||
fBody.push_back({ &JustReturn, const_cast<void*>(ctx) });
|
||||
fTail.push_back({ &JustReturn, const_cast<void*>(ctx) });
|
||||
}
|
||||
|
||||
void SkRasterPipeline::extend(const SkRasterPipeline& src) {
|
||||
SkASSERT(src.fBody.count() == src.fTail.count());
|
||||
|
||||
Fn body_fn = src.fBodyStart,
|
||||
tail_fn = src.fTailStart;
|
||||
Fn body = src.fBodyStart,
|
||||
tail = src.fTailStart;
|
||||
for (int i = 0; i < src.fBody.count(); i++) {
|
||||
this->append(body_fn, src.fBody[i].fCtx,
|
||||
tail_fn, src.fTail[i].fCtx);
|
||||
body_fn = src.fBody[i].fNext;
|
||||
tail_fn = src.fTail[i].fNext;
|
||||
SkASSERT(src.fBody[i].fCtx == src.fTail[i].fCtx);
|
||||
this->append(body, tail, src.fBody[i].fCtx);
|
||||
body = src.fBody[i].fNext;
|
||||
tail = src.fTail[i].fNext;
|
||||
}
|
||||
}
|
||||
|
||||
@ -40,16 +41,14 @@ void SkRasterPipeline::run(size_t x, size_t n) {
|
||||
Sk4f v;
|
||||
|
||||
while (n >= 4) {
|
||||
fBodyStart(fBody.begin(), x, v,v,v,v, v,v,v,v);
|
||||
fBodyStart(fBody.begin(), x,0, v,v,v,v, v,v,v,v);
|
||||
x += 4;
|
||||
n -= 4;
|
||||
}
|
||||
while (n > 0) {
|
||||
fTailStart(fTail.begin(), x, v,v,v,v, v,v,v,v);
|
||||
x += 1;
|
||||
n -= 1;
|
||||
if (n > 0) {
|
||||
fTailStart(fTail.begin(), x,n, v,v,v,v, v,v,v,v);
|
||||
}
|
||||
}
|
||||
|
||||
void SK_VECTORCALL SkRasterPipeline::JustReturn(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f) {}
|
||||
void SK_VECTORCALL SkRasterPipeline::JustReturn(Stage*, size_t, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f) {}
|
||||
|
@ -26,13 +26,16 @@
|
||||
* are designed to maximize the amount of data we can pass along the pipeline cheaply.
|
||||
* On many machines all arguments stay in registers the entire time.
|
||||
*
|
||||
* The meaning of the arguments to Fn are sometimes fixed...
|
||||
* The meaning of the arguments to Fn are sometimes fixed:
|
||||
* - The Stage* always represents the current stage, mainly providing access to ctx().
|
||||
* - The size_t is always the destination x coordinate. If you need y, put it in your context.
|
||||
* - The first size_t is always the destination x coordinate.
|
||||
* (If you need y, put it in your context.)
|
||||
* - The second size_t is always tail: 0 when working on a full 4-pixel slab,
|
||||
* or 1..3 when using only the bottom 1..3 lanes of each register.
|
||||
* - By the time the shader's done, the first four vectors should hold source red,
|
||||
* green, blue, and alpha, up to 4 pixels' worth each.
|
||||
*
|
||||
* ...and sometimes flexible:
|
||||
* Sometimes arguments are flexible:
|
||||
* - In the shader, the first four vectors can be used for anything, e.g. sample coordinates.
|
||||
* - The last four vectors are scratch registers that can be used to communicate between
|
||||
* stages; transfer modes use these to hold the original destination pixel components.
|
||||
@ -43,7 +46,7 @@
|
||||
* 1) call st->next() with its mutated arguments, chaining to the next stage of the pipeline; or
|
||||
* 2) return, indicating the pipeline is complete for these pixels.
|
||||
*
|
||||
* Some obvious stages that typically return are those that write a color to a destination pointer,
|
||||
* Some stages that typically return are those that write a color to a destination pointer,
|
||||
* but any stage can short-circuit the rest of the pipeline by returning instead of calling next().
|
||||
*
|
||||
* Most simple pipeline stages can use the SK_RASTER_STAGE macro to define a static EasyFn,
|
||||
@ -52,26 +55,29 @@
|
||||
* - instead of manually calling a next() function, just modify registers in place.
|
||||
*
|
||||
* To add an EasyFn stage to the pipeline, call append<fn>() instead of append(&fn).
|
||||
* For the last stage of a pipeline, it's a slight performance benefit to call last<fn>().
|
||||
* It's a slight performance benefit to call last<fn>() for the last stage of a pipeline.
|
||||
*/
|
||||
|
||||
// TODO: There may be a better place to stuff tail, e.g. in the bottom alignment bits of
|
||||
// the Stage*. This mostly matters on 64-bit Windows where every register is precious.
|
||||
|
||||
class SkRasterPipeline {
|
||||
public:
|
||||
struct Stage;
|
||||
using Fn = void(SK_VECTORCALL *)(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f);
|
||||
using EasyFn = void(void*, size_t, Sk4f&, Sk4f&, Sk4f&, Sk4f&,
|
||||
Sk4f&, Sk4f&, Sk4f&, Sk4f&);
|
||||
using Fn = void(SK_VECTORCALL *)(Stage*, size_t, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f);
|
||||
using EasyFn = void(void*, size_t, size_t, Sk4f&, Sk4f&, Sk4f&, Sk4f&,
|
||||
Sk4f&, Sk4f&, Sk4f&, Sk4f&);
|
||||
|
||||
struct Stage {
|
||||
template <typename T>
|
||||
T ctx() { return static_cast<T>(fCtx); }
|
||||
|
||||
void SK_VECTORCALL next(size_t x, Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
|
||||
Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
|
||||
void SK_VECTORCALL next(size_t x, size_t tail, Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
|
||||
Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
|
||||
// Stages are logically a pipeline, and physically are contiguous in an array.
|
||||
// To get to the next stage, we just increment our pointer to the next array element.
|
||||
fNext(this+1, x, v0,v1,v2,v3, v4,v5,v6,v7);
|
||||
fNext(this+1, x,tail, v0,v1,v2,v3, v4,v5,v6,v7);
|
||||
}
|
||||
|
||||
// It makes next() a good bit cheaper if we hold the next function to call here,
|
||||
@ -84,50 +90,26 @@ public:
|
||||
SkRasterPipeline();
|
||||
|
||||
// Run the pipeline constructed with append(), walking x through [x,x+n),
|
||||
// generally in 4 pixel steps, but sometimes 1 pixel at a time.
|
||||
// generally in 4-pixel steps, with perhaps one jagged tail step.
|
||||
void run(size_t x, size_t n);
|
||||
void run(size_t n) { this->run(0, n); }
|
||||
|
||||
// Use this append() if your stage is sensitive to the number of pixels you're working with:
|
||||
// - body will always be called for a full 4 pixels
|
||||
// - tail will always be called for a single pixel
|
||||
// Typically this is only an essential distintion for stages that read or write memory.
|
||||
void append(Fn body, const void* body_ctx,
|
||||
Fn tail, const void* tail_ctx);
|
||||
|
||||
// Most stages don't actually care if they're working on 4 or 1 pixel.
|
||||
void append(Fn fn, const void* ctx = nullptr) {
|
||||
this->append(fn, ctx, fn, ctx);
|
||||
}
|
||||
|
||||
// Most 4 pixel or 1 pixel variants share the same context pointer.
|
||||
void append(Fn body, Fn tail, const void* ctx = nullptr) {
|
||||
this->append(body, ctx, tail, ctx);
|
||||
}
|
||||
|
||||
|
||||
// Versions of append that can be used with static EasyFns (see SK_RASTER_STAGE).
|
||||
template <EasyFn body, EasyFn tail>
|
||||
void append(const void* body_ctx, const void* tail_ctx) {
|
||||
this->append(Easy<body>, body_ctx,
|
||||
Easy<tail>, tail_ctx);
|
||||
}
|
||||
template <EasyFn body, EasyFn tail>
|
||||
void last(const void* body_ctx, const void* tail_ctx) {
|
||||
this->append(Last<body>, body_ctx,
|
||||
Last<tail>, tail_ctx);
|
||||
}
|
||||
// body() will only be called with tail=0, indicating it always works on a full 4 pixels.
|
||||
// tail() will only be called with tail=1..3 to handle the jagged end of n%4 pixels.
|
||||
void append(Fn body, Fn tail, const void* ctx = nullptr);
|
||||
void append(Fn fn, const void* ctx = nullptr) { this->append(fn, fn, ctx); }
|
||||
|
||||
// Version of append that can be used with static EasyFn (see SK_RASTER_STAGE).
|
||||
template <EasyFn fn>
|
||||
void append(const void* ctx = nullptr) { this->append<fn, fn>(ctx, ctx); }
|
||||
void append(const void* ctx = nullptr) {
|
||||
this->append(Body<fn,true>, Tail<fn,true>, ctx);
|
||||
}
|
||||
|
||||
// If this is the last stage of the pipeline, last() is a bit faster than append().
|
||||
template <EasyFn fn>
|
||||
void last(const void* ctx = nullptr) { this->last<fn, fn>(ctx, ctx); }
|
||||
|
||||
template <EasyFn body, EasyFn tail>
|
||||
void append(const void* ctx = nullptr) { this->append<body, tail>(ctx, ctx); }
|
||||
template <EasyFn body, EasyFn tail>
|
||||
void last(const void* ctx = nullptr) { this->last<body, tail>(ctx, ctx); }
|
||||
|
||||
void last(const void* ctx = nullptr) {
|
||||
this->append(Body<fn,false>, Tail<fn,false>, ctx);
|
||||
}
|
||||
|
||||
// Append all stages to this pipeline.
|
||||
void extend(const SkRasterPipeline&);
|
||||
@ -138,22 +120,31 @@ private:
|
||||
// This no-op default makes fBodyStart and fTailStart unconditionally safe to call,
|
||||
// and is always the last stage's fNext as a sort of safety net to make sure even a
|
||||
// buggy pipeline can't walk off its own end.
|
||||
static void SK_VECTORCALL JustReturn(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f);
|
||||
static void SK_VECTORCALL JustReturn(Stage*, size_t, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
|
||||
Sk4f,Sk4f,Sk4f,Sk4f);
|
||||
|
||||
template <EasyFn kernel>
|
||||
static void SK_VECTORCALL Easy(SkRasterPipeline::Stage* st, size_t x,
|
||||
template <EasyFn kernel, bool kCallNext>
|
||||
static void SK_VECTORCALL Body(SkRasterPipeline::Stage* st, size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
||||
kernel(st->ctx<void*>(), x, r,g,b,a, dr,dg,db,da);
|
||||
st->next(x, r,g,b,a, dr,dg,db,da);
|
||||
// Passing 0 lets the optimizer completely drop any "if (tail) {...}" code in kernel.
|
||||
kernel(st->ctx<void*>(), x,0, r,g,b,a, dr,dg,db,da);
|
||||
if (kCallNext) {
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); // It's faster to pass tail here than 0.
|
||||
}
|
||||
}
|
||||
|
||||
template <EasyFn kernel>
|
||||
static void SK_VECTORCALL Last(SkRasterPipeline::Stage* st, size_t x,
|
||||
template <EasyFn kernel, bool kCallNext>
|
||||
static void SK_VECTORCALL Tail(SkRasterPipeline::Stage* st, size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
||||
kernel(st->ctx<void*>(), x, r,g,b,a, dr,dg,db,da);
|
||||
#if defined(__clang__)
|
||||
__builtin_assume(tail > 0); // This flourish lets Clang compile away any tail==0 code.
|
||||
#endif
|
||||
kernel(st->ctx<void*>(), x,tail, r,g,b,a, dr,dg,db,da);
|
||||
if (kCallNext) {
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da);
|
||||
}
|
||||
}
|
||||
|
||||
Stages fBody,
|
||||
@ -164,9 +155,9 @@ private:
|
||||
|
||||
// These are always static, and we _really_ want them to inline.
|
||||
// If you find yourself wanting a non-inline stage, write a SkRasterPipeline::Fn directly.
|
||||
#define SK_RASTER_STAGE(name) \
|
||||
static SK_ALWAYS_INLINE void name(void* ctx, size_t x, \
|
||||
Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, \
|
||||
#define SK_RASTER_STAGE(name) \
|
||||
static SK_ALWAYS_INLINE void name(void* ctx, size_t x, size_t tail, \
|
||||
Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, \
|
||||
Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da)
|
||||
|
||||
#endif//SkRasterPipeline_DEFINED
|
||||
|
@ -101,22 +101,29 @@ SK_RASTER_STAGE(lerp_constant_float) {
|
||||
a = lerp(da, a, c);
|
||||
}
|
||||
|
||||
// s' = d(1-c) + sc, 4 pixels at a time for 8-bit coverage.
|
||||
SK_RASTER_STAGE(lerp_a8) {
|
||||
auto ptr = (const uint8_t*)ctx + x;
|
||||
Sk4f c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f);
|
||||
|
||||
r = lerp(dr, r, c);
|
||||
g = lerp(dg, g, c);
|
||||
b = lerp(db, b, c);
|
||||
a = lerp(da, a, c);
|
||||
template <typename T>
|
||||
static SkNx<4,T> load_tail(size_t tail, const T* src) {
|
||||
if (tail) {
|
||||
return SkNx<4,T>(src[0], (tail>1 ? src[1] : 0), (tail>2 ? src[2] : 0), 0);
|
||||
}
|
||||
return SkNx<4,T>::Load(src);
|
||||
}
|
||||
|
||||
// Tail variant of lerp_a8() handling 1 pixel at a time.
|
||||
SK_RASTER_STAGE(lerp_a8_1) {
|
||||
auto ptr = (const uint8_t*)ctx + x;
|
||||
Sk4f c = *ptr * (1/255.0f);
|
||||
template <typename T>
|
||||
static void store_tail(size_t tail, const SkNx<4,T>& v, T* dst) {
|
||||
switch(tail) {
|
||||
case 0: return v.store(dst);
|
||||
case 3: dst[2] = v[2];
|
||||
case 2: dst[1] = v[1];
|
||||
case 1: dst[0] = v[0];
|
||||
}
|
||||
}
|
||||
|
||||
// s' = d(1-c) + sc for 8-bit c.
|
||||
SK_RASTER_STAGE(lerp_a8) {
|
||||
auto ptr = (const uint8_t*)ctx + x;
|
||||
|
||||
Sk4f c = SkNx_cast<float>(load_tail(tail, ptr)) * (1/255.0f);
|
||||
r = lerp(dr, r, c);
|
||||
g = lerp(dg, g, c);
|
||||
b = lerp(db, b, c);
|
||||
@ -137,11 +144,11 @@ static Sk4h to_565(const Sk4f& r, const Sk4f& g, const Sk4f& b) {
|
||||
| Sk4f_round(b * SK_B16_MASK) << SK_B16_SHIFT);
|
||||
}
|
||||
|
||||
// s' = d(1-c) + sc, 4 pixels at a time for 565 coverage.
|
||||
// s' = d(1-c) + sc for 565 c.
|
||||
SK_RASTER_STAGE(lerp_lcd16) {
|
||||
auto ptr = (const uint16_t*)ctx + x;
|
||||
Sk4f cr, cg, cb;
|
||||
from_565(Sk4h::Load(ptr), &cr, &cg, &cb);
|
||||
from_565(load_tail(tail, ptr), &cr, &cg, &cb);
|
||||
|
||||
r = lerp(dr, r, cr);
|
||||
g = lerp(dg, g, cg);
|
||||
@ -149,89 +156,74 @@ SK_RASTER_STAGE(lerp_lcd16) {
|
||||
a = 1.0f;
|
||||
}
|
||||
|
||||
// Tail variant of lerp_lcd16() handling 1 pixel at a time.
|
||||
SK_RASTER_STAGE(lerp_lcd16_1) {
|
||||
auto ptr = (const uint16_t*)ctx + x;
|
||||
Sk4f cr, cg, cb;
|
||||
from_565({*ptr,0,0,0}, &cr, &cg, &cb);
|
||||
|
||||
r = lerp(dr, r, cr);
|
||||
g = lerp(dg, g, cg);
|
||||
b = lerp(db, b, cb);
|
||||
a = 1.0f;
|
||||
}
|
||||
|
||||
// Load 4 565 dst pixels.
|
||||
SK_RASTER_STAGE(load_d_565) {
|
||||
auto ptr = (const uint16_t*)ctx + x;
|
||||
|
||||
from_565(Sk4h::Load(ptr), &dr,&dg,&db);
|
||||
from_565(load_tail(tail, ptr), &dr,&dg,&db);
|
||||
da = 1.0f;
|
||||
}
|
||||
|
||||
// Load 1 565 dst pixel.
|
||||
SK_RASTER_STAGE(load_d_565_1) {
|
||||
auto ptr = (const uint16_t*)ctx + x;
|
||||
|
||||
from_565({*ptr,0,0,0}, &dr,&dg,&db);
|
||||
da = 1.0f;
|
||||
}
|
||||
|
||||
// Store 4 565 pixels.
|
||||
SK_RASTER_STAGE(store_565) {
|
||||
auto ptr = (uint16_t*)ctx + x;
|
||||
to_565(r,g,b).store(ptr);
|
||||
store_tail(tail, to_565(r,g,b), ptr);
|
||||
}
|
||||
|
||||
// Store 1 565 pixel.
|
||||
SK_RASTER_STAGE(store_565_1) {
|
||||
auto ptr = (uint16_t*)ctx + x;
|
||||
*ptr = to_565(r,g,b)[0];
|
||||
}
|
||||
|
||||
// Load 4 F16 pixels.
|
||||
SK_RASTER_STAGE(load_d_f16) {
|
||||
auto ptr = (const uint64_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
auto p0 = SkHalfToFloat_finite_ftz(ptr[0]) ,
|
||||
p1 = tail>1 ? SkHalfToFloat_finite_ftz(ptr[1]) : Sk4f{0},
|
||||
p2 = tail>2 ? SkHalfToFloat_finite_ftz(ptr[2]) : Sk4f{0};
|
||||
dr = { p0[0],p1[0],p2[0],0 };
|
||||
dg = { p0[1],p1[1],p2[1],0 };
|
||||
db = { p0[2],p1[2],p2[2],0 };
|
||||
da = { p0[3],p1[3],p2[3],0 };
|
||||
return;
|
||||
}
|
||||
|
||||
Sk4h rh, gh, bh, ah;
|
||||
Sk4h_load4(ptr, &rh, &gh, &bh, &ah);
|
||||
|
||||
dr = SkHalfToFloat_finite_ftz(rh);
|
||||
dg = SkHalfToFloat_finite_ftz(gh);
|
||||
db = SkHalfToFloat_finite_ftz(bh);
|
||||
da = SkHalfToFloat_finite_ftz(ah);
|
||||
}
|
||||
|
||||
// Load 1 F16 pixel.
|
||||
SK_RASTER_STAGE(load_d_f16_1) {
|
||||
auto ptr = (const uint64_t*)ctx + x;
|
||||
|
||||
auto p0 = SkHalfToFloat_finite_ftz(ptr[0]);
|
||||
dr = { p0[0],0,0,0 };
|
||||
dg = { p0[1],0,0,0 };
|
||||
db = { p0[2],0,0,0 };
|
||||
da = { p0[3],0,0,0 };
|
||||
}
|
||||
|
||||
// Store 4 F16 pixels.
|
||||
SK_RASTER_STAGE(store_f16) {
|
||||
auto ptr = (uint64_t*)ctx + x;
|
||||
|
||||
Sk4h_store4(ptr, SkFloatToHalf_finite_ftz(r), SkFloatToHalf_finite_ftz(g),
|
||||
SkFloatToHalf_finite_ftz(b), SkFloatToHalf_finite_ftz(a));
|
||||
switch (tail) {
|
||||
case 0: return Sk4h_store4(ptr, SkFloatToHalf_finite_ftz(r), SkFloatToHalf_finite_ftz(g),
|
||||
SkFloatToHalf_finite_ftz(b), SkFloatToHalf_finite_ftz(a));
|
||||
|
||||
case 3: SkFloatToHalf_finite_ftz({r[2], g[2], b[2], a[2]}).store(ptr+2);
|
||||
case 2: SkFloatToHalf_finite_ftz({r[1], g[1], b[1], a[1]}).store(ptr+1);
|
||||
case 1: SkFloatToHalf_finite_ftz({r[0], g[0], b[0], a[0]}).store(ptr+0);
|
||||
}
|
||||
}
|
||||
|
||||
// Store 1 F16 pixel.
|
||||
SK_RASTER_STAGE(store_f16_1) {
|
||||
auto ptr = (uint64_t*)ctx + x;
|
||||
|
||||
SkFloatToHalf_finite_ftz({r[0], g[0], b[0], a[0]}).store(ptr);
|
||||
}
|
||||
|
||||
// Load 4 8-bit sRGB pixels from SkPMColor order to RGBA.
|
||||
// Load 8-bit SkPMColor-order sRGB.
|
||||
SK_RASTER_STAGE(load_d_srgb) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
float rs[] = {0,0,0,0},
|
||||
gs[] = {0,0,0,0},
|
||||
bs[] = {0,0,0,0},
|
||||
as[] = {0,0,0,0};
|
||||
for (size_t i = 0; i < tail; i++) {
|
||||
rs[i] = sk_linear_from_srgb[(ptr[i] >> SK_R32_SHIFT) & 0xff];
|
||||
gs[i] = sk_linear_from_srgb[(ptr[i] >> SK_G32_SHIFT) & 0xff];
|
||||
bs[i] = sk_linear_from_srgb[(ptr[i] >> SK_B32_SHIFT) & 0xff];
|
||||
as[i] = (1/255.0f) * (ptr[i] >> SK_A32_SHIFT) ;
|
||||
}
|
||||
dr = Sk4f::Load(rs);
|
||||
dg = Sk4f::Load(gs);
|
||||
db = Sk4f::Load(bs);
|
||||
da = Sk4f::Load(as);
|
||||
return;
|
||||
}
|
||||
|
||||
dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff],
|
||||
@ -250,34 +242,13 @@ SK_RASTER_STAGE(load_d_srgb) {
|
||||
da = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
|
||||
}
|
||||
|
||||
// Tail variant of load_d_srgb() handling 1 pixel at a time.
|
||||
SK_RASTER_STAGE(load_d_srgb_1) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
dr = { sk_linear_from_srgb[(*ptr >> SK_R32_SHIFT) & 0xff], 0,0,0 };
|
||||
dg = { sk_linear_from_srgb[(*ptr >> SK_G32_SHIFT) & 0xff], 0,0,0 };
|
||||
db = { sk_linear_from_srgb[(*ptr >> SK_B32_SHIFT) & 0xff], 0,0,0 };
|
||||
da = { (1/255.0f) * (*ptr >> SK_A32_SHIFT) , 0,0,0 };
|
||||
}
|
||||
|
||||
// Write out 4 pixels as 8-bit SkPMColor-order sRGB.
|
||||
// Store 8-bit SkPMColor-order sRGB.
|
||||
SK_RASTER_STAGE(store_srgb) {
|
||||
auto ptr = (uint32_t*)ctx + x;
|
||||
( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
|
||||
| sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
|
||||
| sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
|
||||
| Sk4f_round(255.0f * a) << SK_A32_SHIFT).store(ptr);
|
||||
}
|
||||
|
||||
// Tail variant of store_srgb() handling 1 pixel at a time.
|
||||
SK_RASTER_STAGE(store_srgb_1) {
|
||||
auto ptr = (uint32_t*)ctx + x;
|
||||
Sk4i rgb = sk_linear_to_srgb_noclamp(swizzle_rb_if_bgra({ r[0], g[0], b[0], 0.0f }));
|
||||
|
||||
uint32_t rgba;
|
||||
SkNx_cast<uint8_t>(rgb).store(&rgba);
|
||||
rgba |= (uint32_t)(255.0f * a[0] + 0.5f) << 24;
|
||||
*ptr = rgba;
|
||||
store_tail(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
|
||||
| sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
|
||||
| sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
|
||||
| Sk4f_round(255.0f * a) << SK_A32_SHIFT), (int*)ptr);
|
||||
}
|
||||
|
||||
static bool supported(const SkImageInfo& info) {
|
||||
@ -341,14 +312,14 @@ void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p, const void* dst
|
||||
switch (fDst.info().colorType()) {
|
||||
case kN32_SkColorType:
|
||||
if (fDst.info().gammaCloseToSRGB()) {
|
||||
p->append<load_d_srgb, load_d_srgb_1>(dst);
|
||||
p->append<load_d_srgb>(dst);
|
||||
}
|
||||
break;
|
||||
case kRGBA_F16_SkColorType:
|
||||
p->append<load_d_f16, load_d_f16_1>(dst);
|
||||
p->append<load_d_f16>(dst);
|
||||
break;
|
||||
case kRGB_565_SkColorType:
|
||||
p->append<load_d_565, load_d_565_1>(dst);
|
||||
p->append<load_d_565>(dst);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
@ -361,14 +332,14 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const
|
||||
switch (fDst.info().colorType()) {
|
||||
case kN32_SkColorType:
|
||||
if (fDst.info().gammaCloseToSRGB()) {
|
||||
p->last<store_srgb, store_srgb_1>(dst);
|
||||
p->last<store_srgb>(dst);
|
||||
}
|
||||
break;
|
||||
case kRGBA_F16_SkColorType:
|
||||
p->last<store_f16, store_f16_1>(dst);
|
||||
p->last<store_f16>(dst);
|
||||
break;
|
||||
case kRGB_565_SkColorType:
|
||||
p->last<store_565, store_565_1>(dst);
|
||||
p->last<store_565>(dst);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
@ -426,10 +397,10 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
|
||||
p.extend(fXfermode);
|
||||
switch (mask.fFormat) {
|
||||
case SkMask::kA8_Format:
|
||||
p.append<lerp_a8, lerp_a8_1>(mask.getAddr8(x,y)-x);
|
||||
p.append<lerp_a8>(mask.getAddr8(x,y)-x);
|
||||
break;
|
||||
case SkMask::kLCD16_Format:
|
||||
p.append<lerp_lcd16, lerp_lcd16_1>(mask.getAddrLCD16(x,y)-x);
|
||||
p.append<lerp_lcd16>(mask.getAddrLCD16(x,y)-x);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
@ -1437,14 +1437,14 @@ static Sk4f inv(const Sk4f& x) { return 1.0f - x; }
|
||||
|
||||
// Most of these modes apply the same logic kernel to each channel.
|
||||
template <Sk4f kernel(const Sk4f& s, const Sk4f& sa, const Sk4f& d, const Sk4f& da)>
|
||||
static void SK_VECTORCALL rgba(SkRasterPipeline::Stage* st, size_t x,
|
||||
static void SK_VECTORCALL rgba(SkRasterPipeline::Stage* st, size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
||||
r = kernel(r,a,dr,da);
|
||||
g = kernel(g,a,dg,da);
|
||||
b = kernel(b,a,db,da);
|
||||
a = kernel(a,a,da,da);
|
||||
st->next(x, r,g,b,a, dr,dg,db,da);
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da);
|
||||
}
|
||||
|
||||
#define KERNEL(name) static Sk4f name(const Sk4f& s, const Sk4f& sa, const Sk4f& d, const Sk4f& da)
|
||||
@ -1468,14 +1468,14 @@ KERNEL(xor_) { return s*inv(da) + d*inv(sa); }
|
||||
// Most of the rest apply the same logic to each color channel, and srcover's logic to alpha.
|
||||
// (darken and lighten can actually go either way, but they're a little faster this way.)
|
||||
template <Sk4f kernel(const Sk4f& s, const Sk4f& sa, const Sk4f& d, const Sk4f& da)>
|
||||
static void SK_VECTORCALL rgb_srcover(SkRasterPipeline::Stage* st, size_t x,
|
||||
static void SK_VECTORCALL rgb_srcover(SkRasterPipeline::Stage* st, size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
||||
r = kernel(r,a,dr,da);
|
||||
g = kernel(g,a,dg,da);
|
||||
b = kernel(b,a,db,da);
|
||||
a = a + da*inv(a);
|
||||
st->next(x, r,g,b,a, dr,dg,db,da);
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da);
|
||||
}
|
||||
|
||||
KERNEL(colorburn) {
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
#endif
|
||||
|
||||
private:
|
||||
static void SK_VECTORCALL Stage(SkRasterPipeline::Stage* st, size_t x,
|
||||
static void SK_VECTORCALL Stage(SkRasterPipeline::Stage* st, size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da);
|
||||
|
||||
@ -74,7 +74,8 @@ sk_sp<SkFlattenable> SkArithmeticMode_scalar::CreateProc(SkReadBuffer& buffer) {
|
||||
return SkArithmeticMode::Make(k1, k2, k3, k4, enforcePMColor);
|
||||
}
|
||||
|
||||
void SK_VECTORCALL SkArithmeticMode_scalar::Stage(SkRasterPipeline::Stage* st, size_t x,
|
||||
void SK_VECTORCALL SkArithmeticMode_scalar::Stage(SkRasterPipeline::Stage* st,
|
||||
size_t x, size_t tail,
|
||||
Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
||||
Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
||||
auto self = st->ctx<const SkArithmeticMode_scalar*>();
|
||||
@ -91,7 +92,7 @@ void SK_VECTORCALL SkArithmeticMode_scalar::Stage(SkRasterPipeline::Stage* st, s
|
||||
|
||||
// A later stage (clamp_01_premul) will pin and fEnforcePMColor for us.
|
||||
|
||||
st->next(x, r,g,b,a, dr,dg,db,da);
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da);
|
||||
}
|
||||
|
||||
void SkArithmeticMode_scalar::xfer32(SkPMColor dst[], const SkPMColor src[],
|
||||
|
@ -8,22 +8,16 @@
|
||||
#include "Test.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
|
||||
// load needs two variants, one to load 4 values...
|
||||
SK_RASTER_STAGE(load) {
|
||||
auto ptr = (const float*)ctx + x;
|
||||
r = Sk4f{ptr[0]};
|
||||
g = Sk4f{ptr[1]};
|
||||
b = Sk4f{ptr[2]};
|
||||
a = Sk4f{ptr[3]};
|
||||
switch(tail&3) {
|
||||
case 0: a = Sk4f{ptr[3]};
|
||||
case 3: b = Sk4f{ptr[2]};
|
||||
case 2: g = Sk4f{ptr[1]};
|
||||
case 1: r = Sk4f{ptr[0]};
|
||||
}
|
||||
}
|
||||
|
||||
// ...and one to load a single value.
|
||||
SK_RASTER_STAGE(load_tail) {
|
||||
auto ptr = (const float*)ctx + x;
|
||||
r = Sk4f{*ptr};
|
||||
}
|
||||
|
||||
// square doesn't really care how many of its inputs are active, nor does it need a context.
|
||||
SK_RASTER_STAGE(square) {
|
||||
r *= r;
|
||||
g *= g;
|
||||
@ -31,26 +25,22 @@ SK_RASTER_STAGE(square) {
|
||||
a *= a;
|
||||
}
|
||||
|
||||
// Like load, store has a _tail variant.
|
||||
SK_RASTER_STAGE(store) {
|
||||
auto ptr = (float*)ctx + x;
|
||||
ptr[0] = r[0];
|
||||
ptr[1] = g[0];
|
||||
ptr[2] = b[0];
|
||||
ptr[3] = a[0];
|
||||
}
|
||||
|
||||
SK_RASTER_STAGE(store_tail) {
|
||||
auto ptr = (float*)ctx + x;
|
||||
*ptr = r[0];
|
||||
switch (tail&3) {
|
||||
case 0: ptr[3] = a[0];
|
||||
case 3: ptr[2] = b[0];
|
||||
case 2: ptr[1] = g[0];
|
||||
case 1: ptr[0] = r[0];
|
||||
}
|
||||
}
|
||||
|
||||
DEF_TEST(SkRasterPipeline, r) {
|
||||
// We'll build up and run a simple pipeline that exercises the salient
|
||||
// mechanics of SkRasterPipeline:
|
||||
// - context pointers
|
||||
// - stages sensitive to the number of pixels
|
||||
// - stages insensitive to the number of pixels
|
||||
// - context pointers (load,store)
|
||||
// - stages sensitive to the number of pixels (load,store)
|
||||
// - stages insensitive to the number of pixels (square)
|
||||
//
|
||||
// This pipeline loads up some values, squares them, then writes them back to memory.
|
||||
|
||||
@ -58,9 +48,9 @@ DEF_TEST(SkRasterPipeline, r) {
|
||||
float dst_vals[] = { 0,0,0,0,0 };
|
||||
|
||||
SkRasterPipeline p;
|
||||
p.append<load, load_tail>(src_vals);
|
||||
p.append<load>(src_vals);
|
||||
p.append<square>();
|
||||
p.append<store, store_tail>(dst_vals);
|
||||
p.append<store>(dst_vals);
|
||||
|
||||
p.run(5);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user