From d1d4cbf2e42db786bef0bd0d526e06dbd5e6870e Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Tue, 6 Aug 2019 13:28:54 -0400 Subject: [PATCH] begin caching Programs in SkVMBlitter My first drafts put caching inside SkVM.cpp, which I rejected for feeling a little too magic, and then in SkVMBuilder.cpp using a fingerprint of the skvm::Builder to skip Builder::done(). This version makes an explicit Key structure holding all the parameters that currently matter to the Blitter, and caches using that. This is nice because it can be done much more cheaply than running the JIT, and much more cheaply even than running the Builder. It also makes the parameterization of the Blitter explicit, which I like. This does sometimes create programs that are equivalent but have different keys, but that's not that common, and it's pretty harmless. E.g. today if the device colorType() is opaque or premul, we'll think those are different programs, but actually end up making the exact same calls to Builder. No big deal, and maybe even gives us a suggestion to do something when the destination is opaque to skip work. I've left that as a TODO followup. We really only need a small cache to get a good hit rate. Running all GMs in one process serially, we're now down to 22 calls to done() from >100K at head (and >500K yesterday). (Would be 13 if we treated opaque and premul the same.) There are two places I'd like to have used tryAcquire() on an SkSpinlock but the thread safety static analysis is wrong and prevents me. Left some TODOs. Change-Id: I83a365fc895720c76b56b0e5a78f4c673fcd9d64 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/232817 Commit-Queue: Mike Klein Reviewed-by: Herb Derby --- src/core/SkVMBlitter.cpp | 146 ++++++++++++++++++++++++++++++++------- 1 file changed, 120 insertions(+), 26 deletions(-) diff --git a/src/core/SkVMBlitter.cpp b/src/core/SkVMBlitter.cpp index 2ba458db63..83dc026211 100644 --- a/src/core/SkVMBlitter.cpp +++ b/src/core/SkVMBlitter.cpp @@ -5,16 +5,48 @@ * found in the LICENSE file. */ +#include "include/private/SkMacros.h" +#include "include/private/SkSpinlock.h" #include "src/core/SkArenaAlloc.h" #include "src/core/SkColorSpacePriv.h" #include "src/core/SkColorSpaceXformSteps.h" #include "src/core/SkCoreBlitters.h" +#include "src/core/SkLRUCache.h" #include "src/core/SkVM.h" namespace { enum class Coverage { Full, UniformA8, MaskA8, MaskLCD16, Mask3D }; + SK_BEGIN_REQUIRE_DENSE; + struct Key { + SkColorType colorType; + SkAlphaType alphaType; + Coverage coverage; + SkBlendMode blendMode; + SkShader* shader; + SkColorFilter* colorFilter; + + Key withCoverage(Coverage c) const { + Key k = *this; + k.coverage = c; + return k; + } + }; + SK_END_REQUIRE_DENSE; + + static bool operator==(const Key& x, const Key& y) { + return x.colorType == y.colorType + && x.alphaType == y.alphaType + && x.coverage == y.coverage + && x.blendMode == y.blendMode + && x.shader == y.shader + && x.colorFilter == y.colorFilter; + } + + static SkSpinlock gProgramCacheLock; + static SkLRUCache gProgramCache{8}; + struct Uniforms { uint32_t paint_color; uint8_t coverage; // Used when Coverage::UniformA8. @@ -83,42 +115,46 @@ namespace { skvm::I32 min(skvm::I32 x, skvm::I32 y) { return select(lt(x,y), x,y); } skvm::I32 max(skvm::I32 x, skvm::I32 y) { return select(gt(x,y), x,y); } - static bool CanBuild(const SkPixmap& device, const SkPaint& paint) { + static bool CanBuild(const Key& key) { // These checks parallel the TODOs in Builder::Builder(). - if (paint.getShader()) { return false; } - if (paint.getColorFilter()) { return false; } - switch (device.colorType()) { + if (key.shader) { return false; } + if (key.colorFilter) { return false; } + + switch (key.colorType) { default: return false; case kRGB_565_SkColorType: break; case kRGBA_8888_SkColorType: break; case kBGRA_8888_SkColorType: break; } - if (device.alphaType() == kUnpremul_SkAlphaType) { return false; } - switch (paint.getBlendMode()) { + + if (key.alphaType == kUnpremul_SkAlphaType) { return false; } + + switch (key.blendMode) { default: return false; case SkBlendMode::kSrc: break; case SkBlendMode::kSrcOver: break; } + return true; } - Builder(const SkPixmap& device, const SkPaint& paint, Coverage coverage) { + explicit Builder(const Key& key) { #define TODO SkUNREACHABLE - SkASSERT(CanBuild(device, paint)); + SkASSERT(CanBuild(key)); skvm::Arg uniforms = uniform(), - dst_ptr = arg(SkColorTypeBytesPerPixel(device.colorType())); + dst_ptr = arg(SkColorTypeBytesPerPixel(key.colorType)); // When coverage is MaskA8 or MaskLCD16 there will be one more mask varying, // and when coverage is Mask3D there will be three more mask varyings. // When there's no shader and no color filter, the source color is the paint color. - if (paint.getShader()) { TODO; } - if (paint.getColorFilter()) { TODO; } + if (key.shader) { TODO; } + if (key.colorFilter) { TODO; } Color src = unpack_8888(uniform32(uniforms, offsetof(Uniforms, paint_color))); // Load up the destination color. Color dst; - switch (device.colorType()) { + switch (key.colorType) { default: TODO; case kRGB_565_SkColorType: dst = unpack_565 (load16(dst_ptr)); break; @@ -129,11 +165,14 @@ namespace { break; } + bool force_opaque = false && key.alphaType == kOpaque_SkAlphaType; // TODO: try this? + if (force_opaque) { dst.a = splat(0xff); } + // We'd need to premul dst after loading and unpremul before storing. - if (device.alphaType() == kUnpremul_SkAlphaType) { TODO; } + if (key.alphaType == kUnpremul_SkAlphaType) { TODO; } // Blend src and dst. - switch (paint.getBlendMode()) { + switch (key.blendMode) { default: TODO; case SkBlendMode::kSrc: break; @@ -149,7 +188,7 @@ namespace { // Lerp with coverage if needed. bool apply_coverage = true; skvm::I32 cr,cg,cb,ca; - switch (coverage) { + switch (key.coverage) { case Coverage::Full: apply_coverage = false; break; @@ -179,8 +218,10 @@ namespace { src.a = mix(dst.a, src.a, ca); } + if (force_opaque) { src.a = splat(0xff); } + // Store back to the destination. - switch (device.colorType()) { + switch (key.colorType) { default: SkUNREACHABLE; case kRGB_565_SkColorType: store16(dst_ptr, pack_565(src)); break; @@ -196,38 +237,91 @@ namespace { public: bool ok = false; - Blitter(const SkPixmap& device, const SkPaint& paint) : fDevice(device), fPaint(paint) { + Blitter(const SkPixmap& device, const SkPaint& paint) + : fDevice(device) + , fKey { + device.colorType(), + device.alphaType(), + Coverage::Full, + paint.getBlendMode(), + paint.getShader(), + paint.getColorFilter(), + } + { SkColor4f color = paint.getColor4f(); SkColorSpaceXformSteps{sk_srgb_singleton(), kUnpremul_SkAlphaType, device.colorSpace(), kUnpremul_SkAlphaType}.apply(color.vec()); - if (color.fitsInBytes() && Builder::CanBuild(device, paint)) { + if (color.fitsInBytes() && Builder::CanBuild(fKey)) { fUniforms.paint_color = color.premul().toBytes_RGBA(); ok = true; } } - private: - // TODO: I kind of forget whether these need to be copies or if they can be const& - SkPixmap fDevice; - SkPaint fPaint; + ~Blitter() override { + // TODO: tryAcquire() if we can get thread safety analysis to understand it. + if ((void)gProgramCacheLock.acquire(), true) { + auto cache = [&](skvm::Program&& p, Coverage coverage) { + if (!p.empty()) { + Key key = fKey.withCoverage(coverage); + if (skvm::Program* found = gProgramCache.find(key)) { + *found = std::move(p); + } else { + gProgramCache.insert(key, std::move(p)); + } + } + }; + cache(std::move(fBlitH), Coverage::Full); + cache(std::move(fBlitAntiH), Coverage::UniformA8); + cache(std::move(fBlitMaskA8), Coverage::MaskA8); + cache(std::move(fBlitMaskLCD16), Coverage::MaskLCD16); + gProgramCacheLock.release(); + } + } + private: + SkPixmap fDevice; // TODO: can this be const&? + const Key fKey; Uniforms fUniforms; skvm::Program fBlitH, fBlitAntiH, fBlitMaskA8, fBlitMaskLCD16; + skvm::Program buildProgram(Coverage coverage) { + Key key = fKey.withCoverage(coverage); + { + skvm::Program p; + // TODO: tryAcquire() is fine here too. + if ((void)gProgramCacheLock.acquire(), true) { + if (skvm::Program* found = gProgramCache.find(key)) { + p = std::move(*found); + } + gProgramCacheLock.release(); + } + if (!p.empty()) { + return p; + } + } + #if 0 + static std::atomic done{0}; + if (0 == done++) { + atexit([]{ SkDebugf("%d calls to done\n", done.load()); }); + } + #endif + return Builder{key}.done(); + } + void blitH(int x, int y, int w) override { if (fBlitH.empty()) { - fBlitH = Builder{fDevice, fPaint, Coverage::Full}.done(); + fBlitH = this->buildProgram(Coverage::Full); } fBlitH.eval(w, &fUniforms, fDevice.addr(x,y)); } void blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) override { if (fBlitAntiH.empty()) { - fBlitAntiH = Builder{fDevice, fPaint, Coverage::UniformA8}.done(); + fBlitAntiH = this->buildProgram(Coverage::UniformA8); } for (int16_t run = *runs; run > 0; run = *runs) { fUniforms.coverage = *cov; @@ -252,14 +346,14 @@ namespace { case SkMask::k3D_Format: // TODO: the mul and add 3D mask planes too case SkMask::kA8_Format: if (fBlitMaskA8.empty()) { - fBlitMaskA8 = Builder{fDevice, fPaint, Coverage::MaskA8}.done(); + fBlitMaskA8 = this->buildProgram(Coverage::MaskA8); } program = &fBlitMaskA8; break; case SkMask::kLCD16_Format: if (fBlitMaskLCD16.empty()) { - fBlitMaskLCD16 = Builder{fDevice, fPaint, Coverage::MaskLCD16}.done(); + fBlitMaskLCD16 = this->buildProgram(Coverage::MaskLCD16); } program = &fBlitMaskLCD16; break;