diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp index 2b32d79953..da7d950609 100644 --- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp +++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp @@ -19,7 +19,9 @@ using UniformHandle = GrGLSLProgramDataManager::UniformHandle; using Direction = GrGaussianConvolutionFragmentProcessor::Direction; -class GrGLConvolutionEffect : public GrGLSLFragmentProcessor { +static constexpr int radius_to_width(int r) { return 2*r + 1; } + +class GrGaussianConvolutionFragmentProcessor::Impl : public GrGLSLFragmentProcessor { public: void emitCode(EmitArgs&) override; @@ -35,7 +37,7 @@ private: typedef GrGLSLFragmentProcessor INHERITED; }; -void GrGLConvolutionEffect::emitCode(EmitArgs& args) { +void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) { const GrGaussianConvolutionFragmentProcessor& ce = args.fFp.cast(); @@ -45,7 +47,7 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) { fIncrementUni = uniformHandler->addUniform(&ce, kFragment_GrShaderFlag, kHalf2_GrSLType, "Increment", &inc); - int width = ce.width(); + int width = radius_to_width(ce.fRadius); int arrayCount = (width + 3) / 4; SkASSERT(4 * arrayCount >= width); @@ -60,7 +62,7 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) { fragBuilder->codeAppendf("%s = half4(0, 0, 0, 0);", args.fOutputColor); - fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), inc); + fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.fRadius, inc); fragBuilder->codeAppend("float2 coordSampled = half2(0, 0);"); // Manually unroll loop because some drivers don't; yields 20-30% speedup. @@ -79,29 +81,34 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) { fragBuilder->codeAppendf("%s *= %s;", args.fOutputColor, args.fInputColor); } -void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman, - const GrFragmentProcessor& processor) { +void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgramDataManager& pdman, + const GrFragmentProcessor& processor) { const auto& conv = processor.cast(); float increment[2] = {}; - increment[static_cast(conv.direction())] = 1; + increment[static_cast(conv.fDirection)] = 1; pdman.set2fv(fIncrementUni, 1, increment); - int width = conv.width(); - int arrayCount = (width + 3) / 4; - SkASSERT(4 * arrayCount >= width); - pdman.set4fv(fKernelUni, arrayCount, conv.kernel()); + int width = radius_to_width(conv.fRadius); + int arrayCount = (width + 3)/4; + SkDEBUGCODE(size_t arraySize = 4*arrayCount;) + SkASSERT(arraySize >= static_cast(width)); + SkASSERT(arraySize <= SK_ARRAY_COUNT(GrGaussianConvolutionFragmentProcessor::fKernel)); + pdman.set4fv(fKernelUni, arrayCount, conv.fKernel); } -void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrShaderCaps&, - GrProcessorKeyBuilder* b) { +void GrGaussianConvolutionFragmentProcessor::Impl::GenKey(const GrProcessor& processor, + const GrShaderCaps&, + GrProcessorKeyBuilder* b) { const auto& conv = processor.cast(); - b->add32(conv.radius()); + b->add32(conv.fRadius); } /////////////////////////////////////////////////////////////////////////////// -static void fill_in_1D_gaussian_kernel(float* kernel, int width, float gaussianSigma, int radius) { + +static void fill_in_1D_gaussian_kernel(float* kernel, float gaussianSigma, int radius) { const float twoSigmaSqrd = 2.0f * gaussianSigma * gaussianSigma; + int width = radius_to_width(radius); if (SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero)) { for (int i = 0; i < width; ++i) { kernel[i] = 0.0f; @@ -173,7 +180,7 @@ GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor( child->setSampledWithExplicitCoords(); this->registerChildProcessor(std::move(child)); SkASSERT(radius <= kMaxKernelRadius); - fill_in_1D_gaussian_kernel(fKernel, this->width(), gaussianSigma, this->radius()); + fill_in_1D_gaussian_kernel(fKernel, gaussianSigma, fRadius); this->addCoordTransform(&fCoordTransform); } @@ -185,23 +192,23 @@ GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor( auto child = that.childProcessor(0).clone(); child->setSampledWithExplicitCoords(); this->registerChildProcessor(std::move(child)); - memcpy(fKernel, that.fKernel, that.width() * sizeof(float)); + memcpy(fKernel, that.fKernel, radius_to_width(fRadius) * sizeof(float)); this->addCoordTransform(&fCoordTransform); } void GrGaussianConvolutionFragmentProcessor::onGetGLSLProcessorKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const { - GrGLConvolutionEffect::GenKey(*this, caps, b); + Impl::GenKey(*this, caps, b); } GrGLSLFragmentProcessor* GrGaussianConvolutionFragmentProcessor::onCreateGLSLInstance() const { - return new GrGLConvolutionEffect; + return new Impl; } bool GrGaussianConvolutionFragmentProcessor::onIsEqual(const GrFragmentProcessor& sBase) const { const auto& that = sBase.cast(); - return this->radius() == that.radius() && this->direction() == that.direction() && - std::equal(fKernel, fKernel + this->width(), that.fKernel); + return fRadius == that.fRadius && fDirection == that.fDirection && + std::equal(fKernel, fKernel + radius_to_width(fRadius), that.fKernel); } /////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h index a2df392991..70fa89bbda 100644 --- a/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h +++ b/src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h @@ -34,12 +34,6 @@ public: const int bounds[2], const GrCaps& caps); - const float* kernel() const { return fKernel; } - - int radius() const { return fRadius; } - int width() const { return 2 * fRadius + 1; } - Direction direction() const { return fDirection; } - const char* name() const override { return "GaussianConvolution"; } #ifdef SK_DEBUG @@ -59,10 +53,7 @@ public: // samples per fragment program run in DX9SM2 (32). A sigma param of 4.0 // on a blur filter gives a kernel width of 25 while a sigma of 5.0 // would exceed a 32 wide kernel. - static const int kMaxKernelRadius = 12; - // With a C++11 we could have a constexpr version of WidthFromRadius() - // and not have to duplicate this calculation. - static const int kMaxKernelWidth = 2 * kMaxKernelRadius + 1; + static constexpr int kMaxKernelRadius = 12; private: GrGaussianConvolutionFragmentProcessor(std::unique_ptr, @@ -80,15 +71,19 @@ private: GR_DECLARE_FRAGMENT_PROCESSOR_TEST + static constexpr int kMaxKernelWidth = 2*kMaxKernelRadius + 1; + // We really just want the unaltered local coords, but the only way to get that right now is // an identity coord transform. GrCoordTransform fCoordTransform = {}; - // TODO: Inline the kernel constants into the generated shader code. This may involve pulling - // some of the logic from SkGpuBlurUtils into this class related to radius/sigma calculations. - float fKernel[kMaxKernelWidth]; + // The array size must be a multiple of 4 because we pass it as an array of float4 uniform + // values. + float fKernel[SkAlign4(kMaxKernelWidth)]; int fRadius; Direction fDirection; + class Impl; + typedef GrFragmentProcessor INHERITED; };