From 1c50643b3cef2e742f2ba2d99179fa92554c289d Mon Sep 17 00:00:00 2001 From: John Stiles Date: Mon, 14 Dec 2020 16:14:45 -0500 Subject: [PATCH] Optimize Gaussian convolution fragment processor. This FP now uses an explicit return statement instead of sk_OutColor. Additionally, simplified the generated code by removing a temp variable that did nothing (coordSampled), and by removing a needless addition at the end of the loop. It looks like coordSampled was useful when it was introduced at http://review.skia.org/20465, but it was later rendered useless. Change-Id: I8c549b16b4d422d7faeab48e2087f168ad5788b5 Bug: skia:10549 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/344156 Commit-Queue: John Stiles Reviewed-by: Brian Osman Auto-Submit: John Stiles --- ...GrGaussianConvolutionFragmentProcessor.cpp | 20 ++++++++----------- .../GrGaussianConvolutionFragmentProcessor.h | 1 + 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp index 547d8f6847..3e0191c835 100644 --- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp +++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp @@ -57,24 +57,20 @@ void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) { GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder; - fragBuilder->codeAppendf("%s = half4(0);", args.fOutputColor); + fragBuilder->codeAppendf("half4 color = half4(0);"); fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", args.fSampleCoord, ce.fRadius, inc); - fragBuilder->codeAppend("float2 coordSampled = half2(0);"); // Manually unroll loop because some drivers don't; yields 20-30% speedup. - static constexpr const char* kVecSuffix[4] = {".x", ".y", ".z", ".w"}; for (int i = 0; i < width; i++) { - SkString kernelIndex; - kernelIndex.printf("%s[%d]", kernel, i/4); - kernelIndex.append(kVecSuffix[i & 0x3]); - - fragBuilder->codeAppend("coordSampled = coord;"); - auto sample = this->invokeChild(0, args, "coordSampled"); - fragBuilder->codeAppendf("%s += %s", args.fOutputColor, sample.c_str()); - fragBuilder->codeAppendf(" * %s;", kernelIndex.c_str()); - fragBuilder->codeAppendf("coord += %s;", inc); + auto sample = this->invokeChild(/*childIndex=*/0, args, "coord"); + if (i != 0) { + fragBuilder->codeAppendf("coord += %s;", inc); + } + fragBuilder->codeAppendf("color += %s * %s[%d][%d];", + sample.c_str(), kernel, i / 4, i & 0x3); } + fragBuilder->codeAppendf("return color;"); } void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgramDataManager& pdman, diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h index 3556102816..2484b2bb57 100644 --- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h +++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h @@ -39,6 +39,7 @@ public: const GrCaps&); const char* name() const override { return "GaussianConvolution"; } + bool usesExplicitReturn() const override { return true; } std::unique_ptr clone() const override { return std::unique_ptr(