Revert of Bilinear optimization for 1D convolution. (patchset #5 id:200001 of https://codereview.chromium.org/1216623003/)

Reason for revert:
Breaks MSAA

Original issue's description:
> Bilinear optimization for 1D convolution.
>
> Splits GrGLConvolutionEffect into GrGLBilerpConvolutionEffect and
> GrGLBoundedConvolutionEffect. When doing a non-bounded convolution we now
> always use the GrGLBilerpConvolutionEffect which uses bilinear filtering to
> perform half as many samples in the texture.
>
> BUG=skia:3986
>
> Committed: https://skia.googlesource.com/skia/+/91abe10af417148939548551e210c001022d3bda

TBR=bsalomon@google.com,senorblanco@chromium.org,ericrk@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:3986

Review URL: https://codereview.chromium.org/1231383005
This commit is contained in:
scroggo 2015-07-13 07:06:45 -07:00 committed by Commit bot
parent 1770839814
commit 39a24f2994
2 changed files with 115 additions and 306 deletions

View File

@ -28,8 +28,6 @@ public:
kY_Direction,
};
// Constructor using default nearest-neighbor sampling for the input texture
// filter mode.
Gr1DKernelEffect(GrProcessorDataManager* procDataManager,
GrTexture* texture,
Direction direction,
@ -38,18 +36,6 @@ public:
, fDirection(direction)
, fRadius(radius) {}
Gr1DKernelEffect(GrProcessorDataManager* procDataManager,
GrTexture* texture,
Direction direction,
int radius,
GrTextureParams::FilterMode filterMode)
: INHERITED(procDataManager,
texture,
GrCoordTransform::MakeDivByTextureWHMatrix(texture),
filterMode)
, fDirection(direction)
, fRadius(radius) {}
virtual ~Gr1DKernelEffect() {};
static int WidthFromRadius(int radius) { return 2 * radius + 1; }

View File

@ -13,24 +13,32 @@
// For brevity
typedef GrGLProgramDataManager::UniformHandle UniformHandle;
/**
* Base class with shared functionality for GrGLBoundedConvolutionEffect and
* GrGLLerpConvolutionEffect.
*/
class GrGLConvolutionEffect : public GrGLFragmentProcessor {
public:
GrGLConvolutionEffect(const GrProcessor&);
virtual void emitCode(GrGLFPBuilder*,
const GrFragmentProcessor&,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray&,
const TextureSamplerArray&) override;
void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;
static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*);
protected:
int radius() const { return fRadius; }
int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); }
Gr1DKernelEffect::Direction direction() const { return fDirection; }
void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const;
private:
int fRadius;
Gr1DKernelEffect::Direction fDirection;
int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); }
bool useBounds() const { return fUseBounds; }
Gr1DKernelEffect::Direction direction() const { return fDirection; }
int fRadius;
bool fUseBounds;
Gr1DKernelEffect::Direction fDirection;
UniformHandle fKernelUni;
UniformHandle fImageIncrementUni;
UniformHandle fBoundsUni;
typedef GrGLFragmentProcessor INHERITED;
};
@ -38,11 +46,101 @@ private:
GrGLConvolutionEffect::GrGLConvolutionEffect(const GrProcessor& processor) {
const GrConvolutionEffect& c = processor.cast<GrConvolutionEffect>();
fRadius = c.radius();
fUseBounds = c.useBounds();
fDirection = c.direction();
}
void GrGLConvolutionEffect::GenKey(const GrProcessor& processor,
const GrGLSLCaps&,
void GrGLConvolutionEffect::emitCode(GrGLFPBuilder* builder,
const GrFragmentProcessor&,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray& coords,
const TextureSamplerArray& samplers) {
fImageIncrementUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility,
kVec2f_GrSLType, kDefault_GrSLPrecision,
"ImageIncrement");
if (this->useBounds()) {
fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility,
kVec2f_GrSLType, kDefault_GrSLPrecision,
"Bounds");
}
fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility,
kFloat_GrSLType, kDefault_GrSLPrecision,
"Kernel", this->width());
GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();
SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);
fsBuilder->codeAppendf("\t\t%s = vec4(0, 0, 0, 0);\n", outputColor);
int width = this->width();
const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni);
const char* imgInc = builder->getUniformCStr(fImageIncrementUni);
fsBuilder->codeAppendf("\t\tvec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), fRadius, imgInc);
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
for (int i = 0; i < width; i++) {
SkString index;
SkString kernelIndex;
index.appendS32(i);
kernel.appendArrayAccess(index.c_str(), &kernelIndex);
if (this->useBounds()) {
// We used to compute a bool indicating whether we're in bounds or not, cast it to a
// float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
// to have a bug that caused corruption.
const char* bounds = builder->getUniformCStr(fBoundsUni);
const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
component, bounds, component, bounds);
}
fsBuilder->codeAppendf("\t\t%s += ", outputColor);
fsBuilder->appendTextureLookup(samplers[0], "coord");
fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());
if (this->useBounds()) {
fsBuilder->codeAppend("}");
}
fsBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc);
}
SkString modulate;
GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);
fsBuilder->codeAppend(modulate.c_str());
}
void GrGLConvolutionEffect::setData(const GrGLProgramDataManager& pdman,
const GrProcessor& processor) {
const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
GrTexture& texture = *conv.texture(0);
// the code we generated was for a specific kernel radius
SkASSERT(conv.radius() == fRadius);
float imageIncrement[2] = { 0 };
float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
switch (conv.direction()) {
case Gr1DKernelEffect::kX_Direction:
imageIncrement[0] = 1.0f / texture.width();
break;
case Gr1DKernelEffect::kY_Direction:
imageIncrement[1] = ySign / texture.height();
break;
default:
SkFAIL("Unknown filter direction.");
}
pdman.set2fv(fImageIncrementUni, 1, imageIncrement);
if (conv.useBounds()) {
const float* bounds = conv.bounds();
if (Gr1DKernelEffect::kY_Direction == conv.direction() &&
texture.origin() != kTopLeft_GrSurfaceOrigin) {
pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
} else {
pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
}
}
pdman.set1fv(fKernelUni, this->width(), conv.kernel());
}
void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&,
GrProcessorKeyBuilder* b) {
const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
uint32_t key = conv.radius();
@ -54,261 +152,6 @@ void GrGLConvolutionEffect::GenKey(const GrProcessor& processor,
b->add32(key);
}
void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& conv,
float (*imageIncrement)[2]) const {
GrTexture& texture = *conv.texture(0);
(*imageIncrement)[0] = (*imageIncrement)[1] = 0;
float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
switch (conv.direction()) {
case Gr1DKernelEffect::kX_Direction:
(*imageIncrement)[0] = 1.0f / texture.width();
break;
case Gr1DKernelEffect::kY_Direction:
(*imageIncrement)[1] = ySign / texture.height();
break;
default:
SkFAIL("Unknown filter direction.");
}
}
///////////////////////////////////////////////////////////////////////////////
/**
* Applies a convolution effect which restricts samples to the provided bounds
* using shader logic.
*/
class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect {
public:
GrGLBoundedConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {}
virtual void emitCode(GrGLFPBuilder*,
const GrFragmentProcessor&,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray&,
const TextureSamplerArray&) override;
void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;
private:
UniformHandle fKernelUni;
UniformHandle fImageIncrementUni;
UniformHandle fBoundsUni;
typedef GrGLConvolutionEffect INHERITED;
};
void GrGLBoundedConvolutionEffect::emitCode(GrGLFPBuilder* builder,
const GrFragmentProcessor& processor,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray& coords,
const TextureSamplerArray& samplers) {
fImageIncrementUni =
builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,
kDefault_GrSLPrecision, "ImageIncrement");
fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,
kDefault_GrSLPrecision, "Bounds");
fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType,
kDefault_GrSLPrecision, "Kernel", this->width());
GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();
SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);
fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor);
int width = this->width();
const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni);
const char* imgInc = builder->getUniformCStr(fImageIncrementUni);
fsBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), this->radius(),
imgInc);
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
for (int i = 0; i < width; i++) {
SkString index;
SkString kernelIndex;
index.appendS32(i);
kernel.appendArrayAccess(index.c_str(), &kernelIndex);
// We used to compute a bool indicating whether we're in bounds or not, cast it to a
// float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
// to have a bug that caused corruption.
const char* bounds = builder->getUniformCStr(fBoundsUni);
const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
component, bounds, component, bounds);
fsBuilder->codeAppendf("%s += ", outputColor);
fsBuilder->appendTextureLookup(samplers[0], "coord");
fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());
fsBuilder->codeAppend("}");
fsBuilder->codeAppendf("coord += %s;\n", imgInc);
}
SkString modulate;
GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);
fsBuilder->codeAppend(modulate.c_str());
}
void GrGLBoundedConvolutionEffect::setData(const GrGLProgramDataManager& pdman,
const GrProcessor& processor) {
const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
// the code we generated was for a specific kernel radius
SkASSERT(conv.radius() == this->radius());
// the code we generated was for a specific bounding mode.
SkASSERT(conv.useBounds());
GrTexture& texture = *conv.texture(0);
float imageIncrement[2];
getImageIncrement(conv, &imageIncrement);
pdman.set2fv(fImageIncrementUni, 1, imageIncrement);
const float* bounds = conv.bounds();
if (Gr1DKernelEffect::kY_Direction == conv.direction() &&
texture.origin() != kTopLeft_GrSurfaceOrigin) {
pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
} else {
pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
}
pdman.set1fv(fKernelUni, this->width(), conv.kernel());
}
///////////////////////////////////////////////////////////////////////////////
/**
* Applies a convolution effect which applies the convolution using a linear
* interpolation optimization to use half as many samples.
*/
class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect {
public:
GrGLLerpConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {}
virtual void emitCode(GrGLFPBuilder*,
const GrFragmentProcessor&,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray&,
const TextureSamplerArray&) override;
void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;
private:
int bilerpSampleCount() const;
// Bounded uniforms
UniformHandle fSampleWeightUni;
UniformHandle fSampleOffsetUni;
typedef GrGLConvolutionEffect INHERITED;
};
void GrGLLerpConvolutionEffect::emitCode(GrGLFPBuilder* builder,
const GrFragmentProcessor& processor,
const char* outputColor,
const char* inputColor,
const TransformedCoordsArray& coords,
const TextureSamplerArray& samplers) {
int sampleCount = bilerpSampleCount();
// We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so
// ensure we don't exceed this. Note that it is currently impossible to
// exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth
// maxes out at 25, resulting in a max sampleCount of 26.
SkASSERT(sampleCount < 16);
fSampleOffsetUni =
builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,
kDefault_GrSLPrecision, "SampleOffset", sampleCount);
fSampleWeightUni =
builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType,
kDefault_GrSLPrecision, "SampleWeight", sampleCount);
GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();
SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);
fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor);
const GrGLShaderVar& kernel = builder->getUniformVariable(fSampleWeightUni);
const GrGLShaderVar& imgInc = builder->getUniformVariable(fSampleOffsetUni);
fsBuilder->codeAppendf("vec2 coord; \n");
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
for (int i = 0; i < sampleCount; i++) {
SkString index;
SkString weightIndex;
SkString offsetIndex;
index.appendS32(i);
kernel.appendArrayAccess(index.c_str(), &weightIndex);
imgInc.appendArrayAccess(index.c_str(), &offsetIndex);
fsBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str());
fsBuilder->codeAppendf("%s += ", outputColor);
fsBuilder->appendTextureLookup(samplers[0], "coord");
fsBuilder->codeAppendf(" * %s;\n", weightIndex.c_str());
}
SkString modulate;
GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);
fsBuilder->codeAppend(modulate.c_str());
}
void GrGLLerpConvolutionEffect::setData(const GrGLProgramDataManager& pdman,
const GrProcessor& processor) {
const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
// the code we generated was for a specific kernel radius
SkASSERT(conv.radius() == this->radius());
// the code we generated was for a specific bounding mode.
SkASSERT(!conv.useBounds());
int sampleCount = bilerpSampleCount();
SkAutoTArray<float> imageIncrements(sampleCount * 2); // X and Y floats per sample.
SkAutoTArray<float> kernel(sampleCount);
float baseImageIncrement[2];
getImageIncrement(conv, &baseImageIncrement);
for (int i = 0; i < sampleCount; i++) {
int sampleIndex1 = i * 2;
int sampleIndex2 = sampleIndex1 + 1;
// If we have an odd number of samples in our filter, the last sample won't use
// the linear interpolation optimization (it will be pixel aligned).
if (sampleIndex2 >= this->width()) {
sampleIndex2 = sampleIndex1;
}
float kernelWeight1 = conv.kernel()[sampleIndex1];
float kernelWeight2 = conv.kernel()[sampleIndex2];
float totalKernelWeight =
(sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2);
float sampleRatio =
(sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2);
imageIncrements[i * 2] = (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[0];
imageIncrements[i * 2 + 1] =
(-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[1];
kernel[i] = totalKernelWeight;
}
pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get());
pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get());
}
int GrGLLerpConvolutionEffect::bilerpSampleCount() const {
// We use a linear interpolation optimization to only sample once for each
// two pixel aligned samples in the kernel. If we have an odd number of
// samples, we will have to skip this optimization for the last sample.
// Because of this we always round up our sample count (by adding 1 before
// dividing).
return (this->width() + 1) / 2;
}
///////////////////////////////////////////////////////////////////////////////
GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager,
@ -318,13 +161,7 @@ GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager
const float* kernel,
bool useBounds,
float bounds[2])
: INHERITED(procDataManager,
texture,
direction,
radius,
useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
: GrTextureParams::FilterMode::kBilerp_FilterMode)
, fUseBounds(useBounds) {
: INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) {
this->initClassID<GrConvolutionEffect>();
SkASSERT(radius <= kMaxKernelRadius);
SkASSERT(kernel);
@ -342,13 +179,7 @@ GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager
float gaussianSigma,
bool useBounds,
float bounds[2])
: INHERITED(procDataManager,
texture,
direction,
radius,
useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
: GrTextureParams::FilterMode::kBilerp_FilterMode)
, fUseBounds(useBounds) {
: INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) {
this->initClassID<GrConvolutionEffect>();
SkASSERT(radius <= kMaxKernelRadius);
int width = this->width();
@ -379,15 +210,7 @@ void GrConvolutionEffect::getGLProcessorKey(const GrGLSLCaps& caps,
}
GrGLFragmentProcessor* GrConvolutionEffect::createGLInstance() const {
// We support a linear interpolation optimization which (when feasible) uses
// half the number of samples to apply the kernel. This is not always
// applicable, as the linear interpolation optimization does not support
// bounded sampling.
if (this->useBounds()) {
return SkNEW_ARGS(GrGLBoundedConvolutionEffect, (*this));
} else {
return SkNEW_ARGS(GrGLLerpConvolutionEffect, (*this));
}
return SkNEW_ARGS(GrGLConvolutionEffect, (*this));
}
bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {