In reduced shader mode 1D Gaussian effect doesn't bake loop count.
Good for 92 shader compile reduction in desk_carsvg.skp. This is probably a candidate for doing all the time, not just in reduced shader mode. Bug: skia:11844 Change-Id: I84e1b41580828d6a4a548c19480cf12c47eeb299 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/399416 Reviewed-by: Michael Ludwig <michaelludwig@google.com> Commit-Queue: Brian Salomon <bsalomon@google.com>
This commit is contained in:
parent
f21d097523
commit
3036defd9e
@ -33,11 +33,28 @@ protected:
|
||||
private:
|
||||
UniformHandle fKernelUni;
|
||||
UniformHandle fOffsetsUni;
|
||||
UniformHandle fKernelWidthUni;
|
||||
UniformHandle fIncrementUni;
|
||||
|
||||
using INHERITED = GrGLSLFragmentProcessor;
|
||||
};
|
||||
|
||||
enum class LoopType {
|
||||
kUnrolled,
|
||||
kFixedLength,
|
||||
kVariableLength,
|
||||
};
|
||||
|
||||
static LoopType loop_type(const GrShaderCaps& caps) {
|
||||
// This checks that bitwise integer operations and array indexing by non-consts are allowed.
|
||||
if (caps.generation() < k130_GrGLSLGeneration) {
|
||||
return LoopType::kUnrolled;
|
||||
}
|
||||
// If we're in reduced shader mode and we can have a loop then use a uniform to limit the
|
||||
// number of iterations so we don't need a code variation for each width.
|
||||
return caps.reducedShaderMode() ? LoopType::kVariableLength : LoopType::kFixedLength;
|
||||
}
|
||||
|
||||
void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) {
|
||||
const GrGaussianConvolutionFragmentProcessor& ce =
|
||||
args.fFp.cast<GrGaussianConvolutionFragmentProcessor>();
|
||||
@ -49,31 +66,52 @@ void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) {
|
||||
|
||||
int width = SkGpuBlurUtils::LinearKernelWidth(ce.fRadius);
|
||||
|
||||
int arrayCount = (width + 3) / 4;
|
||||
LoopType loopType = loop_type(*args.fShaderCaps);
|
||||
|
||||
int arrayCount;
|
||||
if (loopType == LoopType::kVariableLength) {
|
||||
// Size the kernel uniform for the maximum width.
|
||||
arrayCount = (SkGpuBlurUtils::LinearKernelWidth(kMaxKernelRadius) + 3) / 4;
|
||||
} else {
|
||||
arrayCount = (width + 3) / 4;
|
||||
SkASSERT(4 * arrayCount >= width);
|
||||
}
|
||||
|
||||
Var kernel(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Kernel");
|
||||
fKernelUni = VarUniformHandle(kernel);
|
||||
|
||||
Var color(kHalf4_Type, "color", Half4(0));
|
||||
Declare(color);
|
||||
|
||||
Var offsets(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Offsets");
|
||||
fOffsetsUni = VarUniformHandle(offsets);
|
||||
|
||||
Var color(kHalf4_Type, "color", Half4(0));
|
||||
Declare(color);
|
||||
|
||||
Var coord(kFloat2_Type, "coord", sk_SampleCoord());
|
||||
Declare(coord);
|
||||
|
||||
// This checks that bitwise integer operations and array indexing by non-consts are allowed.
|
||||
if (args.fShaderCaps->generation() >= k130_GrGLSLGeneration) {
|
||||
switch (loopType) {
|
||||
case LoopType::kUnrolled:
|
||||
for (int i = 0; i < width; i++) {
|
||||
color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
|
||||
kernel[i / 4][i & 0x3];
|
||||
}
|
||||
break;
|
||||
case LoopType::kFixedLength: {
|
||||
Var i(kInt_Type, "i", 0);
|
||||
For(Declare(i), i < width, i++,
|
||||
color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
|
||||
kernel[i / 4][i & 0x3]);
|
||||
} else {
|
||||
for (int i = 0; i < width; i++) {
|
||||
break;
|
||||
}
|
||||
case LoopType::kVariableLength: {
|
||||
Var kernelWidth(kUniform_Modifier, kInt_Type, "kernelWidth");
|
||||
fKernelWidthUni = VarUniformHandle(kernelWidth);
|
||||
Var i(kInt_Type, "i", 0);
|
||||
For(Declare(i), i < kernelWidth, i++,
|
||||
color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
|
||||
kernel[i / 4][i & 0x3];
|
||||
kernel[i / 4][i & 0x3]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,13 +134,18 @@ void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgram
|
||||
SkASSERT(arraySize <= SK_ARRAY_COUNT(GrGaussianConvolutionFragmentProcessor::fKernel));
|
||||
pdman.set4fv(fKernelUni, arrayCount, conv.fKernel);
|
||||
pdman.set4fv(fOffsetsUni, arrayCount, conv.fOffsets);
|
||||
if (fKernelWidthUni.isValid()) {
|
||||
pdman.set1i(fKernelWidthUni, width);
|
||||
}
|
||||
}
|
||||
|
||||
void GrGaussianConvolutionFragmentProcessor::Impl::GenKey(const GrProcessor& processor,
|
||||
const GrShaderCaps&,
|
||||
const GrShaderCaps& shaderCaps,
|
||||
GrProcessorKeyBuilder* b) {
|
||||
const auto& conv = processor.cast<GrGaussianConvolutionFragmentProcessor>();
|
||||
if (loop_type(shaderCaps) != LoopType::kVariableLength) {
|
||||
b->add32(conv.fRadius);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
Loading…
Reference in New Issue
Block a user