Convert SkRasterPipeline loads and stores to indirect.
This allows us to change the underlying pointer without rebuilding the pipeline, e.g. when moving the blitter from scanline to scanline. The extra overhead when not needed is measurable but small, <2%. We can always add back direct stages later for cases where we know the context pointer will not change. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3943 Change-Id: I827d7e6e4e67d02dd2802610f898f98c5f36f8cb Reviewed-on: https://skia-review.googlesource.com/3943 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
ce37e7af8b
commit
bd3fe475b8
@ -32,14 +32,18 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void onDraw(int loops, SkCanvas*) override {
|
void onDraw(int loops, SkCanvas*) override {
|
||||||
|
void* mask_ctx = mask;
|
||||||
|
void* src_ctx = src;
|
||||||
|
void* dst_ctx = dst;
|
||||||
|
|
||||||
SkRasterPipeline p;
|
SkRasterPipeline p;
|
||||||
p.append(SkRasterPipeline::load_s_srgb, src);
|
p.append(SkRasterPipeline::load_s_srgb, &src_ctx);
|
||||||
p.append(SkRasterPipeline:: scale_u8, mask);
|
p.append(SkRasterPipeline::scale_u8, &mask_ctx);
|
||||||
p.append(kF16 ? SkRasterPipeline::load_d_f16
|
p.append(kF16 ? SkRasterPipeline::load_d_f16
|
||||||
: SkRasterPipeline::load_d_srgb, dst);
|
: SkRasterPipeline::load_d_srgb, &dst_ctx);
|
||||||
p.append(SkRasterPipeline:: srcover);
|
p.append(SkRasterPipeline::srcover);
|
||||||
p.append(kF16 ? SkRasterPipeline::store_f16
|
p.append(kF16 ? SkRasterPipeline::store_f16
|
||||||
: SkRasterPipeline::store_srgb, dst);
|
: SkRasterPipeline::store_srgb, &dst_ctx);
|
||||||
auto compiled = p.compile();
|
auto compiled = p.compile();
|
||||||
|
|
||||||
while (loops --> 0) {
|
while (loops --> 0) {
|
||||||
|
@ -47,6 +47,18 @@ private:
|
|||||||
SkRasterPipeline fShader, fColorFilter, fXfermode;
|
SkRasterPipeline fShader, fColorFilter, fXfermode;
|
||||||
SkPM4f fPaintColor;
|
SkPM4f fPaintColor;
|
||||||
|
|
||||||
|
// These functions are compiled lazily when first used.
|
||||||
|
std::function<void(size_t, size_t)> fBlitH = nullptr,
|
||||||
|
fBlitAntiH = nullptr,
|
||||||
|
fBlitMaskA8 = nullptr,
|
||||||
|
fBlitMaskLCD16 = nullptr;
|
||||||
|
|
||||||
|
// These values are pointed to by the compiled blit functions
|
||||||
|
// above, which allows us to adjust them from call to call.
|
||||||
|
void* fDstPtr = nullptr;
|
||||||
|
const void* fMaskPtr = nullptr;
|
||||||
|
float fConstantCoverage = 0.0f;
|
||||||
|
|
||||||
typedef SkBlitter INHERITED;
|
typedef SkBlitter INHERITED;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -152,33 +164,36 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const
|
|||||||
// TODO: Figure out how to cache some of the compiled pipelines.
|
// TODO: Figure out how to cache some of the compiled pipelines.
|
||||||
|
|
||||||
void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
|
void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
|
||||||
auto dst = fDst.writable_addr(0,y);
|
if (!fBlitH) {
|
||||||
|
SkRasterPipeline p;
|
||||||
|
p.extend(fShader);
|
||||||
|
p.extend(fColorFilter);
|
||||||
|
this->append_load_d(&p, &fDstPtr);
|
||||||
|
p.extend(fXfermode);
|
||||||
|
this->append_store(&p, &fDstPtr);
|
||||||
|
fBlitH = p.compile();
|
||||||
|
}
|
||||||
|
|
||||||
SkRasterPipeline p;
|
fDstPtr = fDst.writable_addr(0,y);
|
||||||
p.extend(fShader);
|
fBlitH(x,w);
|
||||||
p.extend(fColorFilter);
|
|
||||||
this->append_load_d(&p, dst);
|
|
||||||
p.extend(fXfermode);
|
|
||||||
this->append_store(&p, dst);
|
|
||||||
|
|
||||||
p.compile()(x,w);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
|
void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
|
||||||
auto dst = fDst.writable_addr(0,y);
|
if (!fBlitAntiH) {
|
||||||
float coverage;
|
SkRasterPipeline p;
|
||||||
|
p.extend(fShader);
|
||||||
SkRasterPipeline p;
|
p.extend(fColorFilter);
|
||||||
p.extend(fShader);
|
this->append_load_d(&p, &fDstPtr);
|
||||||
p.extend(fColorFilter);
|
p.extend(fXfermode);
|
||||||
this->append_load_d(&p, dst);
|
p.append(SkRasterPipeline::lerp_constant_float, &fConstantCoverage);
|
||||||
p.extend(fXfermode);
|
this->append_store(&p, &fDstPtr);
|
||||||
p.append(SkRasterPipeline::lerp_constant_float, &coverage);
|
fBlitAntiH = p.compile();
|
||||||
this->append_store(&p, dst);
|
}
|
||||||
|
|
||||||
|
fDstPtr = fDst.writable_addr(0,y);
|
||||||
for (int16_t run = *runs; run > 0; run = *runs) {
|
for (int16_t run = *runs; run > 0; run = *runs) {
|
||||||
coverage = *aa * (1/255.0f);
|
fConstantCoverage = *aa * (1/255.0f);
|
||||||
p.compile()(x, run);
|
fBlitAntiH(x, run);
|
||||||
|
|
||||||
x += run;
|
x += run;
|
||||||
runs += run;
|
runs += run;
|
||||||
@ -192,26 +207,44 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
|
|||||||
return INHERITED::blitMask(mask, clip);
|
return INHERITED::blitMask(mask, clip);
|
||||||
}
|
}
|
||||||
|
|
||||||
int x = clip.left();
|
if (mask.fFormat == SkMask::kA8_Format && !fBlitMaskA8) {
|
||||||
for (int y = clip.top(); y < clip.bottom(); y++) {
|
|
||||||
auto dst = fDst.writable_addr(0,y);
|
|
||||||
|
|
||||||
SkRasterPipeline p;
|
SkRasterPipeline p;
|
||||||
p.extend(fShader);
|
p.extend(fShader);
|
||||||
p.extend(fColorFilter);
|
p.extend(fColorFilter);
|
||||||
this->append_load_d(&p, dst);
|
this->append_load_d(&p, &fDstPtr);
|
||||||
p.extend(fXfermode);
|
p.extend(fXfermode);
|
||||||
|
p.append(SkRasterPipeline::lerp_u8, &fMaskPtr);
|
||||||
|
this->append_store(&p, &fDstPtr);
|
||||||
|
fBlitMaskA8 = p.compile();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mask.fFormat == SkMask::kLCD16_Format && !fBlitMaskLCD16) {
|
||||||
|
SkRasterPipeline p;
|
||||||
|
p.extend(fShader);
|
||||||
|
p.extend(fColorFilter);
|
||||||
|
this->append_load_d(&p, &fDstPtr);
|
||||||
|
p.extend(fXfermode);
|
||||||
|
p.append(SkRasterPipeline::lerp_565, &fMaskPtr);
|
||||||
|
this->append_store(&p, &fDstPtr);
|
||||||
|
fBlitMaskLCD16 = p.compile();
|
||||||
|
}
|
||||||
|
|
||||||
|
int x = clip.left();
|
||||||
|
for (int y = clip.top(); y < clip.bottom(); y++) {
|
||||||
|
fDstPtr = fDst.writable_addr(0,y);
|
||||||
|
|
||||||
switch (mask.fFormat) {
|
switch (mask.fFormat) {
|
||||||
case SkMask::kA8_Format:
|
case SkMask::kA8_Format:
|
||||||
p.append(SkRasterPipeline::lerp_u8, mask.getAddr8(x,y)-x);
|
fMaskPtr = mask.getAddr8(x,y)-x;
|
||||||
|
fBlitMaskA8(x, clip.width());
|
||||||
break;
|
break;
|
||||||
case SkMask::kLCD16_Format:
|
case SkMask::kLCD16_Format:
|
||||||
p.append(SkRasterPipeline::lerp_565, mask.getAddrLCD16(x,y)-x);
|
fMaskPtr = mask.getAddrLCD16(x,y)-x;
|
||||||
|
fBlitMaskLCD16(x, clip.width());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// TODO
|
||||||
break;
|
break;
|
||||||
default: break;
|
|
||||||
}
|
}
|
||||||
this->append_store(&p, dst);
|
|
||||||
|
|
||||||
p.compile()(x, clip.width());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -233,7 +233,7 @@ STAGE(lerp_constant_float, true) {
|
|||||||
|
|
||||||
// s' = sc for 8-bit c.
|
// s' = sc for 8-bit c.
|
||||||
STAGE(scale_u8, true) {
|
STAGE(scale_u8, true) {
|
||||||
auto ptr = (const uint8_t*)ctx + x;
|
auto ptr = *(const uint8_t**)ctx + x;
|
||||||
|
|
||||||
SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
|
SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
|
||||||
r = r*c;
|
r = r*c;
|
||||||
@ -244,7 +244,7 @@ STAGE(scale_u8, true) {
|
|||||||
|
|
||||||
// s' = d(1-c) + sc for 8-bit c.
|
// s' = d(1-c) + sc for 8-bit c.
|
||||||
STAGE(lerp_u8, true) {
|
STAGE(lerp_u8, true) {
|
||||||
auto ptr = (const uint8_t*)ctx + x;
|
auto ptr = *(const uint8_t**)ctx + x;
|
||||||
|
|
||||||
SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
|
SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
|
||||||
r = lerp(dr, r, c);
|
r = lerp(dr, r, c);
|
||||||
@ -255,7 +255,7 @@ STAGE(lerp_u8, true) {
|
|||||||
|
|
||||||
// s' = d(1-c) + sc for 565 c.
|
// s' = d(1-c) + sc for 565 c.
|
||||||
STAGE(lerp_565, true) {
|
STAGE(lerp_565, true) {
|
||||||
auto ptr = (const uint16_t*)ctx + x;
|
auto ptr = *(const uint16_t**)ctx + x;
|
||||||
SkNf cr, cg, cb;
|
SkNf cr, cg, cb;
|
||||||
from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
|
from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
|
||||||
|
|
||||||
@ -266,25 +266,25 @@ STAGE(lerp_565, true) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
STAGE(load_d_565, true) {
|
STAGE(load_d_565, true) {
|
||||||
auto ptr = (const uint16_t*)ctx + x;
|
auto ptr = *(const uint16_t**)ctx + x;
|
||||||
from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
|
from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
|
||||||
da = 1.0f;
|
da = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
STAGE(load_s_565, true) {
|
STAGE(load_s_565, true) {
|
||||||
auto ptr = (const uint16_t*)ctx + x;
|
auto ptr = *(const uint16_t**)ctx + x;
|
||||||
from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
|
from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
|
||||||
a = 1.0f;
|
a = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
STAGE(store_565, false) {
|
STAGE(store_565, false) {
|
||||||
clamp_01_premul(r,g,b,a);
|
clamp_01_premul(r,g,b,a);
|
||||||
auto ptr = (uint16_t*)ctx + x;
|
auto ptr = *(uint16_t**)ctx + x;
|
||||||
store<kIsTail>(tail, to_565(r,g,b), ptr);
|
store<kIsTail>(tail, to_565(r,g,b), ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
STAGE(load_d_f16, true) {
|
STAGE(load_d_f16, true) {
|
||||||
auto ptr = (const uint64_t*)ctx + x;
|
auto ptr = *(const uint64_t**)ctx + x;
|
||||||
|
|
||||||
SkNh rh, gh, bh, ah;
|
SkNh rh, gh, bh, ah;
|
||||||
if (kIsTail) {
|
if (kIsTail) {
|
||||||
@ -310,7 +310,7 @@ STAGE(load_d_f16, true) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
STAGE(load_s_f16, true) {
|
STAGE(load_s_f16, true) {
|
||||||
auto ptr = (const uint64_t*)ctx + x;
|
auto ptr = *(const uint64_t**)ctx + x;
|
||||||
|
|
||||||
SkNh rh, gh, bh, ah;
|
SkNh rh, gh, bh, ah;
|
||||||
if (kIsTail) {
|
if (kIsTail) {
|
||||||
@ -337,7 +337,7 @@ STAGE(load_s_f16, true) {
|
|||||||
|
|
||||||
STAGE(store_f16, false) {
|
STAGE(store_f16, false) {
|
||||||
clamp_01_premul(r,g,b,a);
|
clamp_01_premul(r,g,b,a);
|
||||||
auto ptr = (uint64_t*)ctx + x;
|
auto ptr = *(uint64_t**)ctx + x;
|
||||||
|
|
||||||
uint64_t buf[8];
|
uint64_t buf[8];
|
||||||
SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
|
SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
|
||||||
@ -360,7 +360,7 @@ STAGE(store_f16, false) {
|
|||||||
|
|
||||||
// Load 8-bit SkPMColor-order sRGB.
|
// Load 8-bit SkPMColor-order sRGB.
|
||||||
STAGE(load_d_srgb, true) {
|
STAGE(load_d_srgb, true) {
|
||||||
auto ptr = (const uint32_t*)ctx + x;
|
auto ptr = *(const uint32_t**)ctx + x;
|
||||||
|
|
||||||
auto px = load<kIsTail>(tail, ptr);
|
auto px = load<kIsTail>(tail, ptr);
|
||||||
auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
|
auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
|
||||||
@ -371,7 +371,7 @@ STAGE(load_d_srgb, true) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
STAGE(load_s_srgb, true) {
|
STAGE(load_s_srgb, true) {
|
||||||
auto ptr = (const uint32_t*)ctx + x;
|
auto ptr = *(const uint32_t**)ctx + x;
|
||||||
|
|
||||||
auto px = load<kIsTail>(tail, ptr);
|
auto px = load<kIsTail>(tail, ptr);
|
||||||
auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
|
auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
|
||||||
@ -383,7 +383,7 @@ STAGE(load_s_srgb, true) {
|
|||||||
|
|
||||||
STAGE(store_srgb, false) {
|
STAGE(store_srgb, false) {
|
||||||
clamp_01_premul(r,g,b,a);
|
clamp_01_premul(r,g,b,a);
|
||||||
auto ptr = (uint32_t*)ctx + x;
|
auto ptr = *(uint32_t**)ctx + x;
|
||||||
store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
|
store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
|
||||||
| sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
|
| sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
|
||||||
| sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
|
| sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
|
||||||
|
@ -16,11 +16,15 @@ DEF_TEST(SkRasterPipeline, r) {
|
|||||||
blue = 0x3800380000000000ull,
|
blue = 0x3800380000000000ull,
|
||||||
result;
|
result;
|
||||||
|
|
||||||
|
void* load_s_ctx = &blue;
|
||||||
|
void* load_d_ctx = &red;
|
||||||
|
void* store_ctx = &result;
|
||||||
|
|
||||||
SkRasterPipeline p;
|
SkRasterPipeline p;
|
||||||
p.append(SkRasterPipeline::load_s_f16, &blue);
|
p.append(SkRasterPipeline::load_s_f16, &load_s_ctx);
|
||||||
p.append(SkRasterPipeline::load_d_f16, &red);
|
p.append(SkRasterPipeline::load_d_f16, &load_d_ctx);
|
||||||
p.append(SkRasterPipeline::srcover);
|
p.append(SkRasterPipeline::srcover);
|
||||||
p.append(SkRasterPipeline::store_f16, &result);
|
p.append(SkRasterPipeline::store_f16, &store_ctx);
|
||||||
p.compile()(0, 1);
|
p.compile()(0, 1);
|
||||||
|
|
||||||
// We should see half-intensity magenta.
|
// We should see half-intensity magenta.
|
||||||
|
Loading…
Reference in New Issue
Block a user