From 3036defd9eb533d5fc03959b3e005656eb8b83c2 Mon Sep 17 00:00:00 2001 From: Brian Salomon Date: Wed, 21 Apr 2021 19:30:57 -0400 Subject: [PATCH] In reduced shader mode 1D Gaussian effect doesn't bake loop count. Good for 92 shader compile reduction in desk_carsvg.skp. This is probably a candidate for doing all the time, not just in reduced shader mode. Bug: skia:11844 Change-Id: I84e1b41580828d6a4a548c19480cf12c47eeb299 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/399416 Reviewed-by: Michael Ludwig Commit-Queue: Brian Salomon --- ...GrGaussianConvolutionFragmentProcessor.cpp | 75 +++++++++++++++---- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp index 09ec9c3281..e075aecffb 100644 --- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp +++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp @@ -33,11 +33,28 @@ protected: private: UniformHandle fKernelUni; UniformHandle fOffsetsUni; + UniformHandle fKernelWidthUni; UniformHandle fIncrementUni; using INHERITED = GrGLSLFragmentProcessor; }; +enum class LoopType { + kUnrolled, + kFixedLength, + kVariableLength, +}; + +static LoopType loop_type(const GrShaderCaps& caps) { + // This checks that bitwise integer operations and array indexing by non-consts are allowed. + if (caps.generation() < k130_GrGLSLGeneration) { + return LoopType::kUnrolled; + } + // If we're in reduced shader mode and we can have a loop then use a uniform to limit the + // number of iterations so we don't need a code variation for each width. + return caps.reducedShaderMode() ? LoopType::kVariableLength : LoopType::kFixedLength; +} + void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) { const GrGaussianConvolutionFragmentProcessor& ce = args.fFp.cast(); @@ -49,31 +66,52 @@ void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) { int width = SkGpuBlurUtils::LinearKernelWidth(ce.fRadius); - int arrayCount = (width + 3) / 4; - SkASSERT(4 * arrayCount >= width); + LoopType loopType = loop_type(*args.fShaderCaps); + + int arrayCount; + if (loopType == LoopType::kVariableLength) { + // Size the kernel uniform for the maximum width. + arrayCount = (SkGpuBlurUtils::LinearKernelWidth(kMaxKernelRadius) + 3) / 4; + } else { + arrayCount = (width + 3) / 4; + SkASSERT(4 * arrayCount >= width); + } Var kernel(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Kernel"); fKernelUni = VarUniformHandle(kernel); - Var color(kHalf4_Type, "color", Half4(0)); - Declare(color); Var offsets(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Offsets"); fOffsetsUni = VarUniformHandle(offsets); + Var color(kHalf4_Type, "color", Half4(0)); + Declare(color); + Var coord(kFloat2_Type, "coord", sk_SampleCoord()); Declare(coord); - // This checks that bitwise integer operations and array indexing by non-consts are allowed. - if (args.fShaderCaps->generation() >= k130_GrGLSLGeneration) { - Var i(kInt_Type, "i", 0); - For(Declare(i), i < width, i++, - color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) * - kernel[i / 4][i & 0x3]); - } else { - for (int i = 0; i < width; i++) { - color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) * - kernel[i / 4][i & 0x3]; + switch (loopType) { + case LoopType::kUnrolled: + for (int i = 0; i < width; i++) { + color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) * + kernel[i / 4][i & 0x3]; + } + break; + case LoopType::kFixedLength: { + Var i(kInt_Type, "i", 0); + For(Declare(i), i < width, i++, + color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) * + kernel[i / 4][i & 0x3]); + break; + } + case LoopType::kVariableLength: { + Var kernelWidth(kUniform_Modifier, kInt_Type, "kernelWidth"); + fKernelWidthUni = VarUniformHandle(kernelWidth); + Var i(kInt_Type, "i", 0); + For(Declare(i), i < kernelWidth, i++, + color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) * + kernel[i / 4][i & 0x3]); + break; } } @@ -96,13 +134,18 @@ void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgram SkASSERT(arraySize <= SK_ARRAY_COUNT(GrGaussianConvolutionFragmentProcessor::fKernel)); pdman.set4fv(fKernelUni, arrayCount, conv.fKernel); pdman.set4fv(fOffsetsUni, arrayCount, conv.fOffsets); + if (fKernelWidthUni.isValid()) { + pdman.set1i(fKernelWidthUni, width); + } } void GrGaussianConvolutionFragmentProcessor::Impl::GenKey(const GrProcessor& processor, - const GrShaderCaps&, + const GrShaderCaps& shaderCaps, GrProcessorKeyBuilder* b) { const auto& conv = processor.cast(); - b->add32(conv.fRadius); + if (loop_type(shaderCaps) != LoopType::kVariableLength) { + b->add32(conv.fRadius); + } } ///////////////////////////////////////////////////////////////////////////////