diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp index bfe2110362..624b7951de 100644 --- a/bench/SkRasterPipelineBench.cpp +++ b/bench/SkRasterPipelineBench.cpp @@ -32,14 +32,18 @@ public: } void onDraw(int loops, SkCanvas*) override { + void* mask_ctx = mask; + void* src_ctx = src; + void* dst_ctx = dst; + SkRasterPipeline p; - p.append(SkRasterPipeline::load_s_srgb, src); - p.append(SkRasterPipeline:: scale_u8, mask); + p.append(SkRasterPipeline::load_s_srgb, &src_ctx); + p.append(SkRasterPipeline::scale_u8, &mask_ctx); p.append(kF16 ? SkRasterPipeline::load_d_f16 - : SkRasterPipeline::load_d_srgb, dst); - p.append(SkRasterPipeline:: srcover); + : SkRasterPipeline::load_d_srgb, &dst_ctx); + p.append(SkRasterPipeline::srcover); p.append(kF16 ? SkRasterPipeline::store_f16 - : SkRasterPipeline::store_srgb, dst); + : SkRasterPipeline::store_srgb, &dst_ctx); auto compiled = p.compile(); while (loops --> 0) { diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp index ec634c8b98..51c2fc1805 100644 --- a/src/core/SkRasterPipelineBlitter.cpp +++ b/src/core/SkRasterPipelineBlitter.cpp @@ -47,6 +47,18 @@ private: SkRasterPipeline fShader, fColorFilter, fXfermode; SkPM4f fPaintColor; + // These functions are compiled lazily when first used. + std::function fBlitH = nullptr, + fBlitAntiH = nullptr, + fBlitMaskA8 = nullptr, + fBlitMaskLCD16 = nullptr; + + // These values are pointed to by the compiled blit functions + // above, which allows us to adjust them from call to call. + void* fDstPtr = nullptr; + const void* fMaskPtr = nullptr; + float fConstantCoverage = 0.0f; + typedef SkBlitter INHERITED; }; @@ -152,33 +164,36 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const // TODO: Figure out how to cache some of the compiled pipelines. void SkRasterPipelineBlitter::blitH(int x, int y, int w) { - auto dst = fDst.writable_addr(0,y); + if (!fBlitH) { + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + this->append_load_d(&p, &fDstPtr); + p.extend(fXfermode); + this->append_store(&p, &fDstPtr); + fBlitH = p.compile(); + } - SkRasterPipeline p; - p.extend(fShader); - p.extend(fColorFilter); - this->append_load_d(&p, dst); - p.extend(fXfermode); - this->append_store(&p, dst); - - p.compile()(x,w); + fDstPtr = fDst.writable_addr(0,y); + fBlitH(x,w); } void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) { - auto dst = fDst.writable_addr(0,y); - float coverage; - - SkRasterPipeline p; - p.extend(fShader); - p.extend(fColorFilter); - this->append_load_d(&p, dst); - p.extend(fXfermode); - p.append(SkRasterPipeline::lerp_constant_float, &coverage); - this->append_store(&p, dst); + if (!fBlitAntiH) { + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + this->append_load_d(&p, &fDstPtr); + p.extend(fXfermode); + p.append(SkRasterPipeline::lerp_constant_float, &fConstantCoverage); + this->append_store(&p, &fDstPtr); + fBlitAntiH = p.compile(); + } + fDstPtr = fDst.writable_addr(0,y); for (int16_t run = *runs; run > 0; run = *runs) { - coverage = *aa * (1/255.0f); - p.compile()(x, run); + fConstantCoverage = *aa * (1/255.0f); + fBlitAntiH(x, run); x += run; runs += run; @@ -192,26 +207,44 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip) return INHERITED::blitMask(mask, clip); } - int x = clip.left(); - for (int y = clip.top(); y < clip.bottom(); y++) { - auto dst = fDst.writable_addr(0,y); - + if (mask.fFormat == SkMask::kA8_Format && !fBlitMaskA8) { SkRasterPipeline p; p.extend(fShader); p.extend(fColorFilter); - this->append_load_d(&p, dst); + this->append_load_d(&p, &fDstPtr); p.extend(fXfermode); + p.append(SkRasterPipeline::lerp_u8, &fMaskPtr); + this->append_store(&p, &fDstPtr); + fBlitMaskA8 = p.compile(); + } + + if (mask.fFormat == SkMask::kLCD16_Format && !fBlitMaskLCD16) { + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + this->append_load_d(&p, &fDstPtr); + p.extend(fXfermode); + p.append(SkRasterPipeline::lerp_565, &fMaskPtr); + this->append_store(&p, &fDstPtr); + fBlitMaskLCD16 = p.compile(); + } + + int x = clip.left(); + for (int y = clip.top(); y < clip.bottom(); y++) { + fDstPtr = fDst.writable_addr(0,y); + switch (mask.fFormat) { case SkMask::kA8_Format: - p.append(SkRasterPipeline::lerp_u8, mask.getAddr8(x,y)-x); + fMaskPtr = mask.getAddr8(x,y)-x; + fBlitMaskA8(x, clip.width()); break; case SkMask::kLCD16_Format: - p.append(SkRasterPipeline::lerp_565, mask.getAddrLCD16(x,y)-x); + fMaskPtr = mask.getAddrLCD16(x,y)-x; + fBlitMaskLCD16(x, clip.width()); + break; + default: + // TODO break; - default: break; } - this->append_store(&p, dst); - - p.compile()(x, clip.width()); } } diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index b8c882097e..5b16bf34f8 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -233,7 +233,7 @@ STAGE(lerp_constant_float, true) { // s' = sc for 8-bit c. STAGE(scale_u8, true) { - auto ptr = (const uint8_t*)ctx + x; + auto ptr = *(const uint8_t**)ctx + x; SkNf c = SkNx_cast(load(tail, ptr)) * (1/255.0f); r = r*c; @@ -244,7 +244,7 @@ STAGE(scale_u8, true) { // s' = d(1-c) + sc for 8-bit c. STAGE(lerp_u8, true) { - auto ptr = (const uint8_t*)ctx + x; + auto ptr = *(const uint8_t**)ctx + x; SkNf c = SkNx_cast(load(tail, ptr)) * (1/255.0f); r = lerp(dr, r, c); @@ -255,7 +255,7 @@ STAGE(lerp_u8, true) { // s' = d(1-c) + sc for 565 c. STAGE(lerp_565, true) { - auto ptr = (const uint16_t*)ctx + x; + auto ptr = *(const uint16_t**)ctx + x; SkNf cr, cg, cb; from_565(load(tail, ptr), &cr, &cg, &cb); @@ -266,25 +266,25 @@ STAGE(lerp_565, true) { } STAGE(load_d_565, true) { - auto ptr = (const uint16_t*)ctx + x; + auto ptr = *(const uint16_t**)ctx + x; from_565(load(tail, ptr), &dr,&dg,&db); da = 1.0f; } STAGE(load_s_565, true) { - auto ptr = (const uint16_t*)ctx + x; + auto ptr = *(const uint16_t**)ctx + x; from_565(load(tail, ptr), &r,&g,&b); a = 1.0f; } STAGE(store_565, false) { clamp_01_premul(r,g,b,a); - auto ptr = (uint16_t*)ctx + x; + auto ptr = *(uint16_t**)ctx + x; store(tail, to_565(r,g,b), ptr); } STAGE(load_d_f16, true) { - auto ptr = (const uint64_t*)ctx + x; + auto ptr = *(const uint64_t**)ctx + x; SkNh rh, gh, bh, ah; if (kIsTail) { @@ -310,7 +310,7 @@ STAGE(load_d_f16, true) { } STAGE(load_s_f16, true) { - auto ptr = (const uint64_t*)ctx + x; + auto ptr = *(const uint64_t**)ctx + x; SkNh rh, gh, bh, ah; if (kIsTail) { @@ -337,7 +337,7 @@ STAGE(load_s_f16, true) { STAGE(store_f16, false) { clamp_01_premul(r,g,b,a); - auto ptr = (uint64_t*)ctx + x; + auto ptr = *(uint64_t**)ctx + x; uint64_t buf[8]; SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r), @@ -360,7 +360,7 @@ STAGE(store_f16, false) { // Load 8-bit SkPMColor-order sRGB. STAGE(load_d_srgb, true) { - auto ptr = (const uint32_t*)ctx + x; + auto ptr = *(const uint32_t**)ctx + x; auto px = load(tail, ptr); auto to_int = [](const SkNx& v) { return SkNi::Load(&v); }; @@ -371,7 +371,7 @@ STAGE(load_d_srgb, true) { } STAGE(load_s_srgb, true) { - auto ptr = (const uint32_t*)ctx + x; + auto ptr = *(const uint32_t**)ctx + x; auto px = load(tail, ptr); auto to_int = [](const SkNx& v) { return SkNi::Load(&v); }; @@ -383,7 +383,7 @@ STAGE(load_s_srgb, true) { STAGE(store_srgb, false) { clamp_01_premul(r,g,b,a); - auto ptr = (uint32_t*)ctx + x; + auto ptr = *(uint32_t**)ctx + x; store(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp index 049729043f..96798fcd52 100644 --- a/tests/SkRasterPipelineTest.cpp +++ b/tests/SkRasterPipelineTest.cpp @@ -16,11 +16,15 @@ DEF_TEST(SkRasterPipeline, r) { blue = 0x3800380000000000ull, result; + void* load_s_ctx = &blue; + void* load_d_ctx = &red; + void* store_ctx = &result; + SkRasterPipeline p; - p.append(SkRasterPipeline::load_s_f16, &blue); - p.append(SkRasterPipeline::load_d_f16, &red); + p.append(SkRasterPipeline::load_s_f16, &load_s_ctx); + p.append(SkRasterPipeline::load_d_f16, &load_d_ctx); p.append(SkRasterPipeline::srcover); - p.append(SkRasterPipeline::store_f16, &result); + p.append(SkRasterPipeline::store_f16, &store_ctx); p.compile()(0, 1); // We should see half-intensity magenta.