Reland "Support large kernels on GPU in matrix convolution effect"
This reverts commita117e7b75b
. Reason for revert: Fixed divide-by-0 in the unpremul logic. This was here before but never caused problems (or we ignored them.) Original change's description: > Revert "Reland "Support large kernels on GPU in matrix convolution effect"" > > This reverts commit76cb9c4d4c
. > > Reason for revert: Tegra3 & Metal issues > > Original change's description: > > Reland "Support large kernels on GPU in matrix convolution effect" > > > > This reverts commit41e377d1ba
. > > > > Reason for revert: fixed issues > > > > Bug: skia:8449 > > Change-Id: I0c4389f0efa92c6da69253b2304ad9a072750965 > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/287817 > > Commit-Queue: Adlai Holler <adlai@google.com> > > Reviewed-by: Brian Salomon <bsalomon@google.com> > > TBR=bsalomon@google.com,robertphillips@google.com,michaelludwig@google.com,adlai@google.com > > Change-Id: I5c3f04d4d262550a3298b8fd677c8a1661be7ad9 > No-Presubmit: true > No-Tree-Checks: true > No-Try: true > Bug: skia:8449 > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/289076 > Reviewed-by: Adlai Holler <adlai@google.com> > Commit-Queue: Adlai Holler <adlai@google.com> TBR=bsalomon@google.com,robertphillips@google.com,michaelludwig@google.com,adlai@google.com Bug: skia:8449 Change-Id: I90b8e9e0eb52bc08308fb472eb216ed0bd4785a1 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/289030 Reviewed-by: Brian Salomon <bsalomon@google.com> Commit-Queue: Adlai Holler <adlai@google.com>
This commit is contained in:
parent
3d2c41b773
commit
00ddb0029d
@ -22,6 +22,7 @@
|
||||
#include "include/core/SkTypeface.h"
|
||||
#include "include/effects/SkGradientShader.h"
|
||||
#include "include/effects/SkImageFilters.h"
|
||||
#include "src/gpu/effects/GrMatrixConvolutionEffect.h"
|
||||
#include "tools/ToolUtils.h"
|
||||
|
||||
#include <vector>
|
||||
@ -79,7 +80,7 @@ protected:
|
||||
return SkImageFilters::MatrixConvolution({3,3}, kernel.data(), /* gain */ 0.3f, /* bias */ SkIntToScalar(100), kernelOffset, tileMode, convolveAlpha, nullptr, cropRect);
|
||||
}
|
||||
case kLarge_KernelFixture: {
|
||||
// Intentionally go over the MAX_KERNEL_SIZE limit and trigger CPU fallback.
|
||||
static_assert(49 > GrMatrixConvolutionEffect::kMaxUniformSize);
|
||||
// All 1s except center value, which is -47 (sum of 1).
|
||||
std::vector<SkScalar> kernel(49, SkIntToScalar(1));
|
||||
kernel[24] = SkIntToScalar(-47);
|
||||
|
@ -128,8 +128,9 @@ static std::unique_ptr<GrRenderTargetContext> convolve_gaussian_2d(GrRecordingCo
|
||||
SkIPoint kernelOffset = SkIPoint::Make(radiusX, radiusY);
|
||||
GrPaint paint;
|
||||
auto wm = SkTileModeToWrapMode(mode);
|
||||
auto conv = GrMatrixConvolutionEffect::MakeGaussian(std::move(srcView), srcBounds, size, 1.0,
|
||||
0.0, kernelOffset, wm, true, sigmaX, sigmaY,
|
||||
auto conv = GrMatrixConvolutionEffect::MakeGaussian(context, std::move(srcView), srcBounds,
|
||||
size, 1.0, 0.0, kernelOffset, wm, true,
|
||||
sigmaX, sigmaY,
|
||||
*renderTargetContext->caps());
|
||||
paint.addColorFragmentProcessor(std::move(conv));
|
||||
paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
|
||||
@ -453,7 +454,8 @@ std::unique_ptr<GrRenderTargetContext> GaussianBlur(GrRecordingContext* context,
|
||||
if (scaleFactorX == 1 && scaleFactorY == 1) {
|
||||
// For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
|
||||
// launch a single non separable kernel vs two launches.
|
||||
if (sigmaX > 0 && sigmaY > 0 && (2 * radiusX + 1) * (2 * radiusY + 1) <= MAX_KERNEL_SIZE) {
|
||||
const int kernelSize = (2 * radiusX + 1) * (2 * radiusY + 1);
|
||||
if (sigmaX > 0 && sigmaY > 0 && kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize) {
|
||||
// Apply the proxy offset to src bounds and offset directly
|
||||
return convolve_gaussian_2d(context, std::move(srcView), srcColorType, srcBounds,
|
||||
dstBounds, radiusX, radiusY, sigmaX, sigmaY, mode,
|
||||
|
@ -391,9 +391,7 @@ sk_sp<SkSpecialImage> SkMatrixConvolutionImageFilterImpl::onFilterImage(const Co
|
||||
}
|
||||
|
||||
#if SK_SUPPORT_GPU
|
||||
// Note: if the kernel is too big, the GPU path falls back to SW
|
||||
if (ctx.gpuBacked() &&
|
||||
fKernelSize.width() * fKernelSize.height() <= MAX_KERNEL_SIZE) {
|
||||
if (ctx.gpuBacked()) {
|
||||
auto context = ctx.getContext();
|
||||
|
||||
// Ensure the input is in the destination color space. Typically applyCropRect will have
|
||||
@ -414,7 +412,8 @@ sk_sp<SkSpecialImage> SkMatrixConvolutionImageFilterImpl::onFilterImage(const Co
|
||||
// Map srcBounds from input's logical image domain to that of the proxy
|
||||
srcBounds.offset(input->subset().x(), input->subset().y());
|
||||
|
||||
auto fp = GrMatrixConvolutionEffect::Make(std::move(inputView),
|
||||
auto fp = GrMatrixConvolutionEffect::Make(context,
|
||||
std::move(inputView),
|
||||
srcBounds,
|
||||
fKernelSize,
|
||||
fKernel,
|
||||
|
@ -490,6 +490,8 @@ public:
|
||||
|
||||
TextureSampler(GrSurfaceProxyView, GrSamplerState = {});
|
||||
|
||||
TextureSampler(TextureSampler&&) = default;
|
||||
TextureSampler& operator=(TextureSampler&&) = default;
|
||||
TextureSampler& operator=(const TextureSampler&) = delete;
|
||||
|
||||
bool operator==(const TextureSampler& that) const {
|
||||
|
@ -6,6 +6,11 @@
|
||||
*/
|
||||
#include "src/gpu/effects/GrMatrixConvolutionEffect.h"
|
||||
|
||||
#include "include/private/SkHalf.h"
|
||||
#include "src/gpu/GrBitmapTextureMaker.h"
|
||||
#include "src/gpu/GrContextPriv.h"
|
||||
#include "src/gpu/GrProxyProvider.h"
|
||||
#include "src/gpu/GrRecordingContextPriv.h"
|
||||
#include "src/gpu/GrTexture.h"
|
||||
#include "src/gpu/GrTextureProxy.h"
|
||||
#include "src/gpu/effects/GrTextureEffect.h"
|
||||
@ -26,34 +31,195 @@ protected:
|
||||
private:
|
||||
typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
|
||||
|
||||
void emitKernelBlock(EmitArgs&, SkIPoint);
|
||||
|
||||
UniformHandle fKernelUni;
|
||||
UniformHandle fKernelOffsetUni;
|
||||
UniformHandle fGainUni;
|
||||
UniformHandle fBiasUni;
|
||||
UniformHandle fKernelBiasUni;
|
||||
|
||||
typedef GrGLSLFragmentProcessor INHERITED;
|
||||
};
|
||||
|
||||
GrMatrixConvolutionEffect::KernelWrapper GrMatrixConvolutionEffect::KernelWrapper::Make(
|
||||
GrRecordingContext* context, SkISize size, const GrCaps& caps, const SkScalar* values) {
|
||||
if (nullptr == context || nullptr == values || size.isEmpty()) {
|
||||
return {};
|
||||
}
|
||||
const int length = size.area();
|
||||
// Small kernel -> just fill the array.
|
||||
KernelWrapper result(size);
|
||||
if (length <= kMaxUniformSize) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
result.fArray[i] = SkScalarToFloat(values[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ScalableSampler& scalableSampler = result.fScalableSampler;
|
||||
bool useA16 =
|
||||
context->defaultBackendFormat(kA16_float_SkColorType, GrRenderable::kNo).isValid();
|
||||
SkScalar min = values[0];
|
||||
if (!useA16) {
|
||||
// Determine min and max values to figure out inner gain & bias.
|
||||
SkScalar max = values[0];
|
||||
for (int i = 1; i < length; i++) {
|
||||
if (values[i] < min) {
|
||||
min = values[i];
|
||||
}
|
||||
if (values[i] > max) {
|
||||
max = values[i];
|
||||
}
|
||||
}
|
||||
// Treat near-0 gain (i.e. box blur) as 1, and let the kernelBias
|
||||
// move everything up to the final value.
|
||||
const SkScalar computedGain = max - min;
|
||||
scalableSampler.fGain =
|
||||
SkScalarNearlyZero(computedGain) ? 1.0f : SkScalarToFloat(computedGain);
|
||||
// Inner bias is pre-inner-gain so we divide that out.
|
||||
scalableSampler.fBias = SkScalarToFloat(min) / scalableSampler.fGain;
|
||||
}
|
||||
|
||||
// TODO: Enable kernel caching and check perf.
|
||||
static constexpr bool kCacheKernelTexture = false;
|
||||
|
||||
GrUniqueKey key;
|
||||
if (kCacheKernelTexture) {
|
||||
static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
|
||||
GrUniqueKey::Builder builder(&key, kDomain, length, "Matrix Convolution Kernel");
|
||||
// Texture cache key is the exact content of the kernel.
|
||||
static_assert(sizeof(float) == 4);
|
||||
for (int i = 0; i < length; i++) {
|
||||
builder[i] = *(const uint32_t*)&values[i];
|
||||
}
|
||||
builder.finish();
|
||||
}
|
||||
|
||||
// Find or create a texture.
|
||||
GrProxyProvider* proxyProvider = context->priv().proxyProvider();
|
||||
GrSurfaceProxyView view;
|
||||
SkColorType colorType = useA16 ? kA16_float_SkColorType : kAlpha_8_SkColorType;
|
||||
sk_sp<GrTextureProxy> cachedKernel;
|
||||
if (kCacheKernelTexture && (cachedKernel = proxyProvider->findOrCreateProxyByUniqueKey(key))) {
|
||||
GrSwizzle swizzle =
|
||||
context->priv().caps()->getReadSwizzle(cachedKernel->backendFormat(),
|
||||
SkColorTypeToGrColorType(colorType));
|
||||
view = {std::move(cachedKernel), kTopLeft_GrSurfaceOrigin, swizzle};
|
||||
} else {
|
||||
SkBitmap bm;
|
||||
auto info = SkImageInfo::Make({(int)GrNextPow2(length), 1}, colorType,
|
||||
kPremul_SkAlphaType, nullptr);
|
||||
if (!bm.tryAllocPixels(info)) {
|
||||
return {};
|
||||
}
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (useA16) {
|
||||
*bm.getAddr16(i, 0) = SkFloatToHalf(values[i]);
|
||||
} else {
|
||||
*bm.getAddr8(i, 0) =
|
||||
SkScalarRoundToInt((values[i] - min) / scalableSampler.fGain * 255);
|
||||
}
|
||||
}
|
||||
bm.setImmutable();
|
||||
GrBitmapTextureMaker maker(context, bm, GrImageTexGenPolicy::kNew_Uncached_Budgeted);
|
||||
view = maker.view(GrMipMapped::kNo);
|
||||
if (!view) {
|
||||
return {};
|
||||
}
|
||||
if (kCacheKernelTexture) {
|
||||
proxyProvider->assignUniqueKeyToProxy(key, view.asTextureProxy());
|
||||
}
|
||||
}
|
||||
scalableSampler.fSampler = { std::move(view) };
|
||||
return result;
|
||||
}
|
||||
|
||||
bool GrMatrixConvolutionEffect::KernelWrapper::operator==(const KernelWrapper& k) const {
|
||||
if (fSize != k.fSize) {
|
||||
return false;
|
||||
} else if (this->isSampled()) {
|
||||
return fScalableSampler == k.fScalableSampler;
|
||||
} else {
|
||||
return std::equal(fArray.begin(), fArray.begin() + fSize.area(), k.fArray.begin());
|
||||
}
|
||||
}
|
||||
|
||||
bool GrMatrixConvolutionEffect::KernelWrapper::ScalableSampler::operator==(
|
||||
const ScalableSampler& k) const {
|
||||
return fSampler == k.fSampler && fGain == k.fGain && fBias == k.fBias;
|
||||
}
|
||||
|
||||
// For sampled kernels, emit a for loop that does all the kernel accumulation.
|
||||
// For uniform kernels, emit a single iteration. Function is called repeatedly in a for loop.
|
||||
// loc is ignored for sampled kernels.
|
||||
void GrGLMatrixConvolutionEffect::emitKernelBlock(EmitArgs& args, SkIPoint loc) {
|
||||
const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
|
||||
GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
|
||||
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
|
||||
int kernelWidth = mce.kernelSize().width();
|
||||
int kernelHeight = mce.kernelSize().height();
|
||||
int kernelArea = kernelWidth * kernelHeight;
|
||||
|
||||
if (mce.kernelIsSampled()) {
|
||||
fragBuilder->codeAppendf("half2 kernelCoord = half2(0, 0);");
|
||||
fragBuilder->codeAppendf("for (int i = 0; i < %d; ++i)", (int)kernelArea);
|
||||
}
|
||||
|
||||
GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
|
||||
|
||||
fragBuilder->codeAppend("half k;");
|
||||
fragBuilder->codeAppend("half2 sourceOffset;");
|
||||
if (mce.kernelIsSampled()) {
|
||||
const char* kernelBias = uniformHandler->getUniformCStr(fKernelBiasUni);
|
||||
fragBuilder->codeAppend("k = ");
|
||||
fragBuilder->appendTextureLookup(args.fTexSamplers[0], "kernelCoord");
|
||||
fragBuilder->codeAppendf(".w + %s;", kernelBias);
|
||||
fragBuilder->codeAppendf("sourceOffset.y = floor(i / %d);", kernelWidth);
|
||||
fragBuilder->codeAppendf("sourceOffset.x = i - sourceOffset.y * %d;", kernelWidth);
|
||||
float kernelStride = 1.0f / (float)GrNextPow2(kernelArea);
|
||||
fragBuilder->codeAppendf("kernelCoord.x += %f;", kernelStride);
|
||||
} else {
|
||||
fragBuilder->codeAppendf("sourceOffset = half2(%d, %d);", loc.x(), loc.y());
|
||||
int offset = loc.y() * kernelWidth + loc.x();
|
||||
static constexpr const char kVecSuffix[][4] = { ".x", ".y", ".z", ".w" };
|
||||
const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
|
||||
fragBuilder->codeAppendf("k = %s[%d]%s;", kernel, offset / 4,
|
||||
kVecSuffix[offset & 0x3]);
|
||||
}
|
||||
|
||||
auto sample = this->invokeChild(0, args, "coord + sourceOffset");
|
||||
fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
|
||||
if (!mce.convolveAlpha()) {
|
||||
fragBuilder->codeAppend("c.rgb /= max(c.a, 0.0001);");
|
||||
fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
|
||||
}
|
||||
fragBuilder->codeAppend("sum += c * k;");
|
||||
}
|
||||
|
||||
void GrGLMatrixConvolutionEffect::emitCode(EmitArgs& args) {
|
||||
const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
|
||||
|
||||
int kWidth = mce.kernelSize().width();
|
||||
int kHeight = mce.kernelSize().height();
|
||||
int kernelWidth = mce.kernelSize().width();
|
||||
int kernelHeight = mce.kernelSize().height();
|
||||
|
||||
int arrayCount = (kWidth * kHeight + 3) / 4;
|
||||
SkASSERT(4 * arrayCount >= kWidth * kHeight);
|
||||
int arrayCount = (kernelWidth * kernelHeight + 3) / 4;
|
||||
SkASSERT(4 * arrayCount >= kernelWidth * kernelHeight);
|
||||
|
||||
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
|
||||
fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag, kHalf4_GrSLType,
|
||||
"Kernel",
|
||||
arrayCount);
|
||||
if (mce.kernelIsSampled()) {
|
||||
fKernelBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag,
|
||||
kHalf_GrSLType, "KernelBias");
|
||||
} else {
|
||||
fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag,
|
||||
kHalf4_GrSLType, "Kernel", arrayCount);
|
||||
}
|
||||
fKernelOffsetUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf2_GrSLType,
|
||||
"KernelOffset");
|
||||
fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Gain");
|
||||
fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Bias");
|
||||
|
||||
const char* kernelOffset = uniformHandler->getUniformCStr(fKernelOffsetUni);
|
||||
const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
|
||||
const char* gain = uniformHandler->getUniformCStr(fGainUni);
|
||||
const char* bias = uniformHandler->getUniformCStr(fBiasUni);
|
||||
|
||||
@ -62,27 +228,17 @@ void GrGLMatrixConvolutionEffect::emitCode(EmitArgs& args) {
|
||||
mce.sampleMatrix());
|
||||
fragBuilder->codeAppend("half4 sum = half4(0, 0, 0, 0);");
|
||||
fragBuilder->codeAppendf("float2 coord = %s - %s;", coords2D.c_str(), kernelOffset);
|
||||
fragBuilder->codeAppend("half4 c;");
|
||||
|
||||
const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
|
||||
for (int y = 0; y < kHeight; y++) {
|
||||
for (int x = 0; x < kWidth; x++) {
|
||||
GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
|
||||
int offset = y*kWidth + x;
|
||||
if (mce.kernelIsSampled()) {
|
||||
this->emitKernelBlock(args, {});
|
||||
} else {
|
||||
for (int x = 0; x < kernelWidth; ++x) {
|
||||
for (int y = 0; y < kernelHeight; ++y) {
|
||||
this->emitKernelBlock(args, SkIPoint::Make(x, y));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragBuilder->codeAppendf("half k = %s[%d]%s;", kernel, offset / 4,
|
||||
kVecSuffix[offset & 0x3]);
|
||||
SkSL::String coord;
|
||||
coord.appendf("coord + half2(%d, %d)", x, y);
|
||||
auto sample = this->invokeChild(0, args, coord);
|
||||
fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
|
||||
if (!mce.convolveAlpha()) {
|
||||
fragBuilder->codeAppend("c.rgb /= c.a;");
|
||||
fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
|
||||
}
|
||||
fragBuilder->codeAppend("sum += c * k;");
|
||||
}
|
||||
}
|
||||
if (mce.convolveAlpha()) {
|
||||
fragBuilder->codeAppendf("%s = sum * %s + %s;", args.fOutputColor, gain, bias);
|
||||
fragBuilder->codeAppendf("%s.a = saturate(%s.a);", args.fOutputColor, args.fOutputColor);
|
||||
@ -90,7 +246,7 @@ void GrGLMatrixConvolutionEffect::emitCode(EmitArgs& args) {
|
||||
args.fOutputColor, args.fOutputColor, args.fOutputColor);
|
||||
} else {
|
||||
auto sample = this->invokeChild(0, args, coords2D.c_str());
|
||||
fragBuilder->codeAppendf("c = %s;", sample.c_str());
|
||||
fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
|
||||
fragBuilder->codeAppendf("%s.a = c.a;", args.fOutputColor);
|
||||
fragBuilder->codeAppendf("%s.rgb = saturate(sum.rgb * %s + %s);", args.fOutputColor, gain, bias);
|
||||
fragBuilder->codeAppendf("%s.rgb *= %s.a;", args.fOutputColor, args.fOutputColor);
|
||||
@ -111,17 +267,22 @@ void GrGLMatrixConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdma
|
||||
const GrFragmentProcessor& processor) {
|
||||
const GrMatrixConvolutionEffect& conv = processor.cast<GrMatrixConvolutionEffect>();
|
||||
pdman.set2fv(fKernelOffsetUni, 1, conv.kernelOffset().ptr());
|
||||
int kernelCount = conv.kernelSize().width() * conv.kernelSize().height();
|
||||
float totalGain = conv.gain();
|
||||
if (conv.kernelIsSampled()) {
|
||||
totalGain *= conv.kernelSampleGain();
|
||||
pdman.set1f(fKernelBiasUni, conv.kernelSampleBias());
|
||||
} else {
|
||||
int kernelCount = conv.kernelSize().area();
|
||||
int arrayCount = (kernelCount + 3) / 4;
|
||||
SkASSERT(4 * arrayCount >= kernelCount);
|
||||
pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
|
||||
pdman.set1f(fGainUni, conv.gain());
|
||||
}
|
||||
pdman.set1f(fBiasUni, conv.bias());
|
||||
pdman.set1f(fGainUni, totalGain);
|
||||
}
|
||||
|
||||
GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
|
||||
const SkISize& kernelSize,
|
||||
const SkScalar* kernel,
|
||||
KernelWrapper kernel,
|
||||
SkScalar gain,
|
||||
SkScalar bias,
|
||||
const SkIPoint& kernelOffset,
|
||||
@ -129,14 +290,14 @@ GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentP
|
||||
// To advertise either the modulation or opaqueness optimizations we'd have to examine the
|
||||
// parameters.
|
||||
: INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
|
||||
, fKernelSize(kernelSize)
|
||||
, fKernel(std::move(kernel))
|
||||
, fGain(SkScalarToFloat(gain))
|
||||
, fBias(SkScalarToFloat(bias) / 255.0f)
|
||||
, fConvolveAlpha(convolveAlpha) {
|
||||
child->setSampledWithExplicitCoords();
|
||||
this->registerChildProcessor(std::move(child));
|
||||
for (int i = 0; i < kernelSize.width() * kernelSize.height(); i++) {
|
||||
fKernel[i] = SkScalarToFloat(kernel[i]);
|
||||
if (fKernel.isSampled()) {
|
||||
this->setTextureSamplerCnt(1);
|
||||
}
|
||||
fKernelOffset = {static_cast<float>(kernelOffset.x()),
|
||||
static_cast<float>(kernelOffset.y())};
|
||||
@ -145,7 +306,7 @@ GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentP
|
||||
|
||||
GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect& that)
|
||||
: INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
|
||||
, fKernelSize(that.fKernelSize)
|
||||
, fKernel(that.fKernel)
|
||||
, fGain(that.fGain)
|
||||
, fBias(that.fBias)
|
||||
, fKernelOffset(that.fKernelOffset)
|
||||
@ -153,7 +314,9 @@ GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEf
|
||||
auto child = that.childProcessor(0).clone();
|
||||
child->setSampledWithExplicitCoords();
|
||||
this->registerChildProcessor(std::move(child));
|
||||
std::copy_n(that.fKernel, fKernelSize.width() * fKernelSize.height(), fKernel);
|
||||
if (fKernel.isSampled()) {
|
||||
this->setTextureSamplerCnt(1);
|
||||
}
|
||||
this->addCoordTransform(&fCoordTransform);
|
||||
}
|
||||
|
||||
@ -172,14 +335,18 @@ GrGLSLFragmentProcessor* GrMatrixConvolutionEffect::onCreateGLSLInstance() const
|
||||
|
||||
bool GrMatrixConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
|
||||
const GrMatrixConvolutionEffect& s = sBase.cast<GrMatrixConvolutionEffect>();
|
||||
return fKernelSize == s.kernelSize() &&
|
||||
std::equal(fKernel, fKernel + fKernelSize.area(), s.fKernel) &&
|
||||
return fKernel == s.fKernel &&
|
||||
fGain == s.gain() &&
|
||||
fBias == s.bias() &&
|
||||
fKernelOffset == s.kernelOffset() &&
|
||||
fConvolveAlpha == s.convolveAlpha();
|
||||
}
|
||||
|
||||
const GrFragmentProcessor::TextureSampler& GrMatrixConvolutionEffect::onTextureSampler(
|
||||
int index) const {
|
||||
return IthTextureSampler(index, fKernel.scalableSampler().fSampler);
|
||||
}
|
||||
|
||||
static void fill_in_1D_gaussian_kernel_with_stride(float* kernel, int size, int stride,
|
||||
float twoSigmaSqrd) {
|
||||
SkASSERT(!SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero));
|
||||
@ -204,7 +371,6 @@ static void fill_in_1D_gaussian_kernel_with_stride(float* kernel, int size, int
|
||||
|
||||
static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
|
||||
SkScalar sigmaX, SkScalar sigmaY) {
|
||||
SkASSERT(width * height <= MAX_KERNEL_SIZE);
|
||||
const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
|
||||
const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
|
||||
|
||||
@ -260,7 +426,8 @@ static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrSurfaceProxyView srcView,
|
||||
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
|
||||
GrSurfaceProxyView srcView,
|
||||
const SkIRect& srcBounds,
|
||||
const SkISize& kernelSize,
|
||||
const SkScalar* kernel,
|
||||
@ -270,14 +437,19 @@ std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrSurfacePr
|
||||
GrSamplerState::WrapMode wm,
|
||||
bool convolveAlpha,
|
||||
const GrCaps& caps) {
|
||||
auto kw = KernelWrapper::Make(context, kernelSize, caps, kernel);
|
||||
if (!kw.isValid()) {
|
||||
return nullptr;
|
||||
}
|
||||
GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
|
||||
auto child = GrTextureEffect::MakeSubset(std::move(srcView), kPremul_SkAlphaType, SkMatrix::I(),
|
||||
sampler, SkRect::Make(srcBounds), caps);
|
||||
return std::unique_ptr<GrFragmentProcessor>(new GrMatrixConvolutionEffect(
|
||||
std::move(child), kernelSize, kernel, gain, bias, kernelOffset, convolveAlpha));
|
||||
std::move(child), std::move(kw), gain, bias, kernelOffset, convolveAlpha));
|
||||
}
|
||||
|
||||
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::MakeGaussian(
|
||||
GrRecordingContext* context,
|
||||
GrSurfaceProxyView srcView,
|
||||
const SkIRect& srcBounds,
|
||||
const SkISize& kernelSize,
|
||||
@ -289,11 +461,11 @@ std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::MakeGaussian(
|
||||
SkScalar sigmaX,
|
||||
SkScalar sigmaY,
|
||||
const GrCaps& caps) {
|
||||
float kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
fill_in_2D_gaussian_kernel(kernel, kernelSize.width(), kernelSize.height(), sigmaX, sigmaY);
|
||||
return Make(std::move(srcView), srcBounds, kernelSize, kernel, gain, bias, kernelOffset, wm,
|
||||
convolveAlpha, caps);
|
||||
SkAutoSTMalloc<32, float> kernel(kernelSize.area());
|
||||
fill_in_2D_gaussian_kernel(kernel.get(), kernelSize.width(), kernelSize.height(),
|
||||
sigmaX, sigmaY);
|
||||
return Make(context, std::move(srcView), srcBounds, kernelSize, kernel.get(),
|
||||
gain, bias, kernelOffset, wm, convolveAlpha, caps);
|
||||
}
|
||||
|
||||
GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
|
||||
@ -302,8 +474,9 @@ GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
|
||||
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrProcessorTestData* d) {
|
||||
auto [view, ct, at] = d->randomView();
|
||||
|
||||
int width = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE);
|
||||
int height = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE / width);
|
||||
static constexpr size_t kMaxTestKernelSize = 2 * kMaxUniformSize;
|
||||
int width = d->fRandom->nextRangeU(1, kMaxTestKernelSize);
|
||||
int height = d->fRandom->nextRangeU(1, kMaxTestKernelSize / width);
|
||||
SkISize kernelSize = SkISize::Make(width, height);
|
||||
std::unique_ptr<SkScalar[]> kernel(new SkScalar[width * height]);
|
||||
for (int i = 0; i < width * height; i++) {
|
||||
@ -325,8 +498,8 @@ std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrPro
|
||||
auto wm = static_cast<GrSamplerState::WrapMode>(
|
||||
d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
|
||||
bool convolveAlpha = d->fRandom->nextBool();
|
||||
|
||||
return GrMatrixConvolutionEffect::Make(std::move(view),
|
||||
return GrMatrixConvolutionEffect::Make(d->context()->priv().asRecordingContext(),
|
||||
std::move(view),
|
||||
bounds,
|
||||
kernelSize,
|
||||
kernel.get(),
|
||||
|
@ -9,15 +9,18 @@
|
||||
#define GrMatrixConvolutionEffect_DEFINED
|
||||
|
||||
#include "src/gpu/GrFragmentProcessor.h"
|
||||
|
||||
// A little bit less than the minimum # uniforms required by DX9SM2 (32).
|
||||
// Allows for a 5x5 kernel (or 28x1, for that matter).
|
||||
// Must be a multiple of 4, since we upload these in vec4s.
|
||||
#define MAX_KERNEL_SIZE 28
|
||||
#include <array>
|
||||
#include <new>
|
||||
|
||||
class GrMatrixConvolutionEffect : public GrFragmentProcessor {
|
||||
public:
|
||||
static std::unique_ptr<GrFragmentProcessor> Make(GrSurfaceProxyView srcView,
|
||||
// A little bit less than the minimum # uniforms required by DX9SM2 (32).
|
||||
// Allows for a 5x5 kernel (or 28x1, for that matter).
|
||||
// Must be a multiple of 4, since we upload these in vec4s.
|
||||
static constexpr int kMaxUniformSize = 28;
|
||||
|
||||
static std::unique_ptr<GrFragmentProcessor> Make(GrRecordingContext*,
|
||||
GrSurfaceProxyView srcView,
|
||||
const SkIRect& srcBounds,
|
||||
const SkISize& kernelSize,
|
||||
const SkScalar* kernel,
|
||||
@ -28,7 +31,8 @@ public:
|
||||
bool convolveAlpha,
|
||||
const GrCaps&);
|
||||
|
||||
static std::unique_ptr<GrFragmentProcessor> MakeGaussian(GrSurfaceProxyView srcView,
|
||||
static std::unique_ptr<GrFragmentProcessor> MakeGaussian(GrRecordingContext*,
|
||||
GrSurfaceProxyView srcView,
|
||||
const SkIRect& srcBounds,
|
||||
const SkISize& kernelSize,
|
||||
SkScalar gain,
|
||||
@ -41,9 +45,12 @@ public:
|
||||
const GrCaps&);
|
||||
|
||||
const SkIRect& bounds() const { return fBounds; }
|
||||
const SkISize& kernelSize() const { return fKernelSize; }
|
||||
SkISize kernelSize() const { return fKernel.size(); }
|
||||
const SkV2 kernelOffset() const { return fKernelOffset; }
|
||||
const float* kernel() const { return fKernel; }
|
||||
bool kernelIsSampled() const { return fKernel.isSampled(); }
|
||||
const float *kernel() const { return fKernel.array().data(); }
|
||||
float kernelSampleGain() const { return fKernel.scalableSampler().fGain; }
|
||||
float kernelSampleBias() const { return fKernel.scalableSampler().fBias; }
|
||||
float gain() const { return fGain; }
|
||||
float bias() const { return fBias; }
|
||||
bool convolveAlpha() const { return fConvolveAlpha; }
|
||||
@ -53,11 +60,74 @@ public:
|
||||
std::unique_ptr<GrFragmentProcessor> clone() const override;
|
||||
|
||||
private:
|
||||
// srcProxy is the texture that is going to be convolved
|
||||
// srcBounds is the subset of 'srcProxy' that will be used (e.g., for clamp mode)
|
||||
GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor>,
|
||||
const SkISize& kernelSize,
|
||||
const SkScalar* kernel,
|
||||
/**
|
||||
* Small kernels are represented as float-arrays and uploaded as uniforms.
|
||||
* Large kernels go over the uniform limit and are uploaded as textures and sampled.
|
||||
* If Float16 textures are supported, we use those. Otherwise we use A8.
|
||||
*/
|
||||
class KernelWrapper {
|
||||
public:
|
||||
struct ScalableSampler {
|
||||
TextureSampler fSampler;
|
||||
// Only used in A8 mode. Applied before any other math.
|
||||
float fBias = 0.0f;
|
||||
// Only used in A8 mode. Premultiplied in with user gain to save time.
|
||||
float fGain = 1.0f;
|
||||
bool operator==(const ScalableSampler&) const;
|
||||
};
|
||||
static KernelWrapper Make(GrRecordingContext*, SkISize,
|
||||
const GrCaps&, const float* values);
|
||||
|
||||
KernelWrapper(KernelWrapper&& that) : fSize(that.fSize) {
|
||||
if (that.isSampled()) {
|
||||
new (&fScalableSampler) ScalableSampler(std::move(that.fScalableSampler));
|
||||
} else {
|
||||
new (&fArray) std::array<float, kMaxUniformSize>(std::move(that.fArray));
|
||||
}
|
||||
}
|
||||
KernelWrapper(const KernelWrapper& that) : fSize(that.fSize) {
|
||||
if (that.isSampled()) {
|
||||
new (&fScalableSampler) ScalableSampler(that.fScalableSampler);
|
||||
} else {
|
||||
new (&fArray) std::array<float, kMaxUniformSize>(that.fArray);
|
||||
}
|
||||
}
|
||||
~KernelWrapper() {
|
||||
if (this->isSampled()) {
|
||||
fScalableSampler.~ScalableSampler();
|
||||
}
|
||||
}
|
||||
|
||||
bool isValid() const { return !fSize.isEmpty(); }
|
||||
SkISize size() const { return fSize; }
|
||||
bool isSampled() const { return fSize.area() > kMaxUniformSize; }
|
||||
const std::array<float, kMaxUniformSize>& array() const {
|
||||
SkASSERT(!this->isSampled());
|
||||
return fArray;
|
||||
}
|
||||
const ScalableSampler& scalableSampler() const {
|
||||
SkASSERT(this->isSampled());
|
||||
return fScalableSampler;
|
||||
}
|
||||
bool operator==(const KernelWrapper&) const;
|
||||
|
||||
private:
|
||||
KernelWrapper() : fSize({}) {}
|
||||
KernelWrapper(SkISize size) : fSize(size) {
|
||||
if (this->isSampled()) {
|
||||
new (&fScalableSampler) ScalableSampler;
|
||||
}
|
||||
}
|
||||
|
||||
SkISize fSize;
|
||||
union {
|
||||
std::array<float, kMaxUniformSize> fArray;
|
||||
ScalableSampler fScalableSampler;
|
||||
};
|
||||
};
|
||||
|
||||
GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
|
||||
KernelWrapper kernel,
|
||||
SkScalar gain,
|
||||
SkScalar bias,
|
||||
const SkIPoint& kernelOffset,
|
||||
@ -71,12 +141,13 @@ private:
|
||||
|
||||
bool onIsEqual(const GrFragmentProcessor&) const override;
|
||||
|
||||
const GrFragmentProcessor::TextureSampler& onTextureSampler(int index) const override;
|
||||
|
||||
// We really just want the unaltered local coords, but the only way to get that right now is
|
||||
// an identity coord transform.
|
||||
GrCoordTransform fCoordTransform = {};
|
||||
SkIRect fBounds;
|
||||
SkISize fKernelSize;
|
||||
float fKernel[MAX_KERNEL_SIZE];
|
||||
KernelWrapper fKernel;
|
||||
float fGain;
|
||||
float fBias;
|
||||
SkV2 fKernelOffset;
|
||||
|
Loading…
Reference in New Issue
Block a user