Go back to rect blur profile textures.

It's faster than analytic.

This version of profile texture has better binning and simpler shader
logic than the original. I believe it also avoids some integralization
that led to artifacts in the previous texture implementation.

We oversize the profile for the blur, bin by pow 2 with a 32 texel min
and then rescale the texture coordinates over a 6 sigma range in the
shader. We pre-inset the rect uniform so the texture is always placed
with one end at the rect edge, extending outward.

Bug: chromium:995308
Change-Id: I739db98d4df69de0f5b2f5dda079cf034ba32035
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239119
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
This commit is contained in:
Brian Salomon 2019-09-04 11:20:45 -04:00 committed by Skia Commit-Bot
parent 03c68091c1
commit e736684166
3 changed files with 178 additions and 74 deletions

View File

@ -6,10 +6,11 @@
@header {
#include "include/core/SkScalar.h"
#include "src/core/SkBlurMask.h"
#include "src/gpu/GrProxyProvider.h"
#include "src/gpu/GrShaderCaps.h"
#include "include/core/SkScalar.h"
#include "src/core/SkBlurMask.h"
#include "src/core/SkMathPriv.h"
#include "src/gpu/GrProxyProvider.h"
#include "src/gpu/GrShaderCaps.h"
in float4 rect;
@ -20,7 +21,55 @@ layout(key) bool highp = abs(rect.x) > 16000 || abs(rect.y) > 16000 ||
layout(when= highp) uniform float4 rectF;
layout(when=!highp) uniform half4 rectH;
in uniform half sigma;
in uniform sampler2D blurProfile;
in uniform half invProfileWidth;
@constructorParams {
GrSamplerState samplerParams
@samplerParams(blurProfile) {
@class {
static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
float sigma) {
// The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
// plane. All such profiles are just scales of each other. So all we really care about is
// having enough resolution so that the linear interpolation done in texture lookup doesn't
// introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
// ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
// Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just scale
// the texture coords to span the larger profile over a 6 sigma distance).
int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
GrUniqueKey key;
GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
builder[0] = profileWidth;
sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
if (!blurProfile) {
SkBitmap bitmap;
if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
return nullptr;
SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
if (!blurProfile) {
return nullptr;
SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
return blurProfile;
@make {
static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
@ -36,11 +85,6 @@ in uniform half sigma;
return nullptr;
// Sigma is always a half.
SkASSERT(sigma > 0);
if (sigma > 16000.f) {
return nullptr;
if (doubleProfileSize >= (float) rect.width() ||
doubleProfileSize >= (float) rect.height()) {
@ -49,52 +93,45 @@ in uniform half sigma;
return nullptr;
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
if (!profile) {
return nullptr;
// The profile is calculated such that the midpoint is at the rect's edge. To simplify
// calculating texture coords in the shader, we inset the rect such that the profile
// can be used with one end point aligned to the edges of the rect uniform. The texture
// coords should be scaled such that the profile is sampled over a 6 sigma range so inset
// by 3 sigma.
float halfW = 3.f * sigma;
auto insetR = rect.makeInset(halfW, halfW);
// inverse of the width over which the profile texture should be interpolated outward from
// the inset rect.
float invWidth = 1.f / (2 * halfW);
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
void main() {
// Get the smaller of the signed distance from the frag coord to the left and right edges
// and similar for y.
// The blur profile computed by SkMaskFilter::ComputeBlurProfile is actually 1 - integral.
// The integral is an S-looking shape that is symmetric about 0, so we just compute x and
// "backwards" such that texture coord is 1 at the edge and goes to 0 as we move outward.
half x;
@if (highp) {
x = min(half(sk_FragCoord.x - rectF.x), half(rectF.z - sk_FragCoord.x));
x = max(half(rectF.x - sk_FragCoord.x), half(sk_FragCoord.x - rectF.z));
} else {
x = min(half(sk_FragCoord.x - rectH.x), half(rectH.z - sk_FragCoord.x));
x = max(half(rectH.x - sk_FragCoord.x), half(sk_FragCoord.x - rectH.z));
half y;
@if (highp) {
y = min(half(sk_FragCoord.y - rectF.y), half(rectF.w - sk_FragCoord.y));
y = max(half(rectF.y - sk_FragCoord.y), half(sk_FragCoord.y - rectF.w));
} else {
y = min(half(sk_FragCoord.y - rectH.y), half(rectH.w - sk_FragCoord.y));
y = max(half(rectH.y - sk_FragCoord.y), half(sk_FragCoord.y - rectH.w));
// The sw code computes an approximation of an integral of the Gaussian from -inf to x,
// where x is the signed distance to the edge (positive inside the rect). The approximation
// is based on three box filters and is a piecewise cubic. The piecewise nature introduces
// branches so here we use a 5th degree very close approximation of the piecewise cubic. The
// piecewise cubic goes from 0 to 1 as x goes from -1.5 to 1.5.
half r = 1 / (2.0 * sigma);
x *= r;
y *= r;
// The polynomial is such that we can either clamp the domain or the range. Clamping the
// range (xCoverage/yCoverage) seems to be faster but the polynomial quickly produces very
// large absolute values outside the [-1.5, 1.5] domain and some mobile GPUs don't seem to
// properly produce -infs or infs in that case. So instead we clamp the domain (x/y). The
// perf is probably because clamping to [0, 1] is faster than clamping to [-1.5, 1.5].
x = clamp(x, -1.5, 1.5);
y = clamp(y, -1.5, 1.5);
half x2 = x * x;
half x3 = x2 * x;
half x5 = x2 * x3;
half a = 0.734822;
half b = -0.313376;
half c = 0.0609169;
half d = 0.5;
half xCoverage = a * x + b * x3 + c * x5 + d;
half y2 = y * y;
half y3 = y2 * y;
half y5 = y2 * y3;
half yCoverage = a * y + b * y3 + c * y5 + d;
half xCoverage = sample(blurProfile, half2(x * invProfileWidth, 0.5)).a;
half yCoverage = sample(blurProfile, half2(y * invProfileWidth, 0.5)).a;
sk_OutColor = sk_InColor * xCoverage * yCoverage;

View File

@ -25,8 +25,8 @@ public:
auto rect = _outer.rect;
auto sigma = _outer.sigma;
auto invProfileWidth = _outer.invProfileWidth;
highp = ((abs(rect.left()) > 16000.0 || abs(rect.top()) > 16000.0) ||
abs(rect.right()) > 16000.0) ||
abs(rect.bottom()) > 16000.0;
@ -38,16 +38,17 @@ public:
rectHVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf4_GrSLType,
sigmaVar =
args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf_GrSLType, "sigma");
invProfileWidthVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag,
kHalf_GrSLType, "invProfileWidth");
"/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n x = "
"min(half(sk_FragCoord.x - %s.x), half(%s.z - sk_FragCoord.x));\n} else {\n x = "
"min(half(sk_FragCoord.x - float(%s.x)), half(float(%s.z) - "
"sk_FragCoord.x));\n}\nhalf y;\n@if (highp) {\n y = min(half(sk_FragCoord.y - "
"%s.y), half(%s.w - sk_FragCoord.y));\n} else {\n y = min(half(sk_FragCoord.y - "
"float(%s.y)), half(float(%s.w) - sk_FragCoord.y));\n}\nhalf r = 1.0 / (2.0 * "
"%s);\nx *= r;\ny *= r;\nx = clamp(x, -1.5, 1.5);\ny = clamp(y, -1.5, 1.5",
"/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n x = max(half(%s.x - "
"sk_FragCoord.x), half(sk_FragCoord.x - %s.z));\n} else {\n x = "
"max(half(float(%s.x) - sk_FragCoord.x), half(sk_FragCoord.x - "
"float(%s.z)));\n}\nhalf y;\n@if (highp) {\n y = max(half(%s.y - "
"sk_FragCoord.y), half(sk_FragCoord.y - %s.w));\n} else {\n y = "
"max(half(float(%s.y) - sk_FragCoord.y), half(sk_FragCoord.y - "
"float(%s.w)));\n}\nhalf xCoverage = sample(%s, float2(half2(x * %s, "
"0.5))).%s.w;\nhalf yCoverage = sample(%s, flo",
(highp ? "true" : "false"),
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
@ -57,13 +58,14 @@ public:
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
");\nhalf x2 = x * x;\nhalf x3 = x2 * x;\nhalf x5 = x2 * x3;\n\n\n\n\nhalf "
"xCoverage = ((0.73482197523117065 * x + -0.31337600946426392 * x3) + "
"0.060916900634765625 * x5) + 0.5;\nhalf y2 = y * y;\nhalf y3 = y2 * y;\nhalf y5 = "
"y2 * y3;\nhalf yCoverage = ((0.73482197523117065 * y + -0.31337600946426392 * y3) "
"+ 0.060916900634765625 * y5) + 0.5;\n%s = (%s * xCoverage) * yCoverage;\n",
"at2(half2(y * %s, 0.5))).%s.w;\n%s = (%s * xCoverage) * yCoverage;\n",
args.fOutputColor, args.fInputColor);
@ -71,15 +73,18 @@ private:
void onSetData(const GrGLSLProgramDataManager& pdman,
const GrFragmentProcessor& _proc) override {
const GrRectBlurEffect& _outer = _proc.cast<GrRectBlurEffect>();
{ pdman.set1f(sigmaVar, (_outer.sigma)); }
{ pdman.set1f(invProfileWidthVar, (_outer.invProfileWidth)); }
auto rect = _outer.rect;
UniformHandle& rectF = rectFVar;
UniformHandle& rectH = rectHVar;
UniformHandle& sigma = sigmaVar;
GrSurfaceProxy& blurProfileProxy = *_outer.textureSampler(0).proxy();
GrTexture& blurProfile = *blurProfileProxy.peekTexture();
UniformHandle& invProfileWidth = invProfileWidthVar;
float r[]{rect.fLeft, rect.fTop, rect.fRight, rect.fBottom};
pdman.set4fv(highp ? rectF : rectH, 1, r);
@ -87,7 +92,7 @@ private:
bool highp = false;
UniformHandle rectFVar;
UniformHandle rectHVar;
UniformHandle sigmaVar;
UniformHandle invProfileWidthVar;
GrGLSLFragmentProcessor* GrRectBlurEffect::onCreateGLSLInstance() const {
return new GrGLSLRectBlurEffect();
@ -103,16 +108,23 @@ bool GrRectBlurEffect::onIsEqual(const GrFragmentProcessor& other) const {
const GrRectBlurEffect& that = other.cast<GrRectBlurEffect>();
if (rect != that.rect) return false;
if (sigma != that.sigma) return false;
if (blurProfile != that.blurProfile) return false;
if (invProfileWidth != that.invProfileWidth) return false;
return true;
GrRectBlurEffect::GrRectBlurEffect(const GrRectBlurEffect& src)
: INHERITED(kGrRectBlurEffect_ClassID, src.optimizationFlags())
, rect(src.rect)
, sigma(src.sigma) {}
, blurProfile(src.blurProfile)
, invProfileWidth(src.invProfileWidth) {
std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::clone() const {
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(*this));
const GrFragmentProcessor::TextureSampler& GrRectBlurEffect::onTextureSampler(int index) const {
return IthTextureSampler(index, blurProfile);
std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::TestCreate(GrProcessorTestData* data) {

View File

@ -14,6 +14,7 @@
#include "include/core/SkScalar.h"
#include "src/core/SkBlurMask.h"
#include "src/core/SkMathPriv.h"
#include "src/gpu/GrProxyProvider.h"
#include "src/gpu/GrShaderCaps.h"
@ -21,6 +22,44 @@
#include "src/gpu/GrFragmentProcessor.h"
class GrRectBlurEffect : public GrFragmentProcessor {
static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
float sigma) {
// The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
// plane. All such profiles are just scales of each other. So all we really care about is
// having enough resolution so that the linear interpolation done in texture lookup doesn't
// introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
// ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
// Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just
// scale the texture coords to span the larger profile over a 6 sigma distance).
int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
GrUniqueKey key;
GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
builder[0] = profileWidth;
sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
if (!blurProfile) {
SkBitmap bitmap;
if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
return nullptr;
SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
if (!blurProfile) {
return nullptr;
SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
return blurProfile;
static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
const GrShaderCaps& caps, const SkRect& rect,
float sigma) {
@ -34,11 +73,6 @@ public:
return nullptr;
// Sigma is always a half.
SkASSERT(sigma > 0);
if (sigma > 16000.f) {
return nullptr;
if (doubleProfileSize >= (float)rect.width() || doubleProfileSize >= (float)rect.height()) {
// if the blur sigma is too large so the gaussian overlaps the whole
@ -46,23 +80,44 @@ public:
return nullptr;
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
if (!profile) {
return nullptr;
// The profile is calculated such that the midpoint is at the rect's edge. To simplify
// calculating texture coords in the shader, we inset the rect such that the profile
// can be used with one end point aligned to the edges of the rect uniform. The texture
// coords should be scaled such that the profile is sampled over a 6 sigma range so inset
// by 3 sigma.
float halfW = 3.f * sigma;
auto insetR = rect.makeInset(halfW, halfW);
// inverse of the width over which the profile texture should be interpolated outward from
// the inset rect.
float invWidth = 1.f / (2 * halfW);
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
GrRectBlurEffect(const GrRectBlurEffect& src);
std::unique_ptr<GrFragmentProcessor> clone() const override;
const char* name() const override { return "RectBlurEffect"; }
SkRect rect;
float sigma;
TextureSampler blurProfile;
float invProfileWidth;
GrRectBlurEffect(SkRect rect, float sigma)
GrRectBlurEffect(SkRect rect, sk_sp<GrTextureProxy> blurProfile, float invProfileWidth,
GrSamplerState samplerParams)
: INHERITED(kGrRectBlurEffect_ClassID,
, rect(rect)
, sigma(sigma) {}
, blurProfile(std::move(blurProfile), samplerParams)
, invProfileWidth(invProfileWidth) {
GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
bool onIsEqual(const GrFragmentProcessor&) const override;
const TextureSampler& onTextureSampler(int) const override;
typedef GrFragmentProcessor INHERITED;