Remove legacy mask blur code.

This was flag flipped about last year in the chrome code, and
flag flipped about three weeks ago in Android.

All the *_LEGACY_*BLUR flags are gone.

Change-Id: I046f3a040ccbf12ff3a810c41b02131095df3368
Reviewed-on: https://skia-review.googlesource.com/96001
Reviewed-by: Mike Klein <mtklein@chromium.org>
Reviewed-by: Florin Malita <fmalita@chromium.org>
Commit-Queue: Herb Derby <herb@google.com>
This commit is contained in:
Herbert Derby 2018-01-17 17:18:55 -05:00 committed by Skia Commit-Bot
parent dcb086bd79
commit 53d57ace99
2 changed files with 9 additions and 788 deletions

View File

@ -17,8 +17,6 @@
static const double kPi = 3.14159265358979323846264338327950288;
static constexpr double kSmallSigma = 2.0;
class BlurScanInterface {
public:
virtual ~BlurScanInterface() = default;
@ -55,237 +53,6 @@ public:
}
};
class PlanBox final : public PlanningInterface {
public:
explicit PlanBox(double sigma) {
// Calculate the radius from sigma. Taken from the old code until something better is
// figured out.
auto possibleRadius = 1.5 * sigma - 0.5;
auto radius = std::max(std::numeric_limits<double>::epsilon(), possibleRadius);
auto outerRadius = std::ceil(radius);
auto outerWindow = 2 * outerRadius + 1;
auto outerFactor = (1 - (outerRadius - radius)) / outerWindow;
fOuterWeight = static_cast<uint32_t>(round(outerFactor * (1ull << 24)));
auto innerRadius = outerRadius - 1;
auto innerWindow = 2 * innerRadius + 1;
auto innerFactor = (1 - (radius - innerRadius)) / innerWindow;
fInnerWeight = static_cast<uint32_t>(round(innerFactor * (1ull << 24)));
// Sliding window is defined by the relationship between the outer and inner widows.
// In the single window case, you add the element on the right, and subtract the element on
// the left. But, because two windows are used, this relationship is more complicated; an
// element is added from the right of the outer window, and subtracted from the left of the
// inner window. Because innerWindow = outerWindow - 2, the distance between
// the left and right in the two window case is outerWindow - 1.
fSlidingWindow = static_cast<int>(outerWindow - 1);
}
size_t bufferSize() const override {
return fSlidingWindow * (sizeof(Sk4u) / sizeof(uint32_t));
}
// Remember that sliding window = window - 1. Therefore, radius = sliding window / 2.
int border() const override { return fSlidingWindow / 2; }
bool needsBlur() const override { return true; }
BlurScanInterface* makeBlurScan(
SkArenaAlloc* alloc, int width, uint32_t* buffer) const override
{
int noChangeCount,
trailingEdgeZeroCount;
// The relation between the slidingWindow and the width dictates two operating modes.
// * width >= slidingWindow - both sides of the window are contained in the image while
// scanning. Therefore, we assume that slidingWindow zeros are consumed on the trailing
// edge of the window. After this count, then both edges are traversing the image.
// * slidingWindow > width - both sides of the window are off the image while scanning
// the middle. The front edge of the window can only travel width until it falls off the
// image. At this point, both edges of the window are off the image consuming zeros
// and therefore, the destination value does not change. The scan produces unchanged
// values until the trailing edge of the window enters the image. This count is
// slidingWindow - width.
if (width >= fSlidingWindow) {
noChangeCount = 0;
trailingEdgeZeroCount = fSlidingWindow;
} else {
noChangeCount = fSlidingWindow - width;
trailingEdgeZeroCount = width;
}
Sk4u* sk4uBuffer = reinterpret_cast<Sk4u*>(buffer);
return alloc->make<Box>(fOuterWeight, fInnerWeight, noChangeCount, trailingEdgeZeroCount,
sk4uBuffer, sk4uBuffer + fSlidingWindow);
}
private:
class Box final : public BlurScanInterface {
public:
Box(uint32_t outerWeight, uint32_t innerWeight,
int noChangeCount, int trailingEdgeZeroCount,
Sk4u* buffer, Sk4u* bufferEnd)
: fOuterWeight{outerWeight}
, fInnerWeight{innerWeight}
, fNoChangeCount{noChangeCount}
, fTrailingEdgeZeroCount{trailingEdgeZeroCount}
, fBuffer{buffer}
, fBufferEnd{bufferEnd} { }
void blur(const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
auto rightOuter = src;
auto dstCursor = dst;
auto interpolateSums = [this](uint32_t outerSum, uint32_t innerSum) {
return SkTo<uint8_t>(
(fOuterWeight * outerSum + fInnerWeight * innerSum + kHalf) >> 24);
};
uint32_t outerSum = 0;
uint32_t innerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
innerSum = outerSum;
outerSum += *rightOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
rightOuter += srcStride;
dstCursor += dstStride;
}
// slidingWindow > width
for (int i = 0; i < fNoChangeCount; i++) {
*dstCursor = interpolateSums(outerSum, innerSum);;
dstCursor += dstStride;
}
// width > slidingWindow
auto leftInner = src;
while (rightOuter < srcEnd) {
innerSum = outerSum - *leftInner;
outerSum += *rightOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
outerSum -= *leftInner;
rightOuter += srcStride;
leftInner += srcStride;
dstCursor += dstStride;
}
auto leftOuter = srcEnd;
dstCursor = dstEnd;
outerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
leftOuter -= srcStride;
dstCursor -= dstStride;
innerSum = outerSum;
outerSum += *leftOuter;
*dstCursor = interpolateSums(outerSum, innerSum);
}
}
bool canBlur4() override { return true; }
// NB this is a transposing scan. The next src is src+1, and the next down is
// src+srcStride.
void blur4Transpose(
const uint8_t* src, int srcStride, const uint8_t* srcEnd,
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
auto rightOuter = src;
auto dstCursor = dst;
Sk4u* const bufferStart = fBuffer;
Sk4u* bufferCursor = bufferStart;
Sk4u* const bufferEnd = fBufferEnd;
const Sk4u outerWeight(SkTo<uint32_t>(fOuterWeight));
const Sk4u innerWeight(SkTo<uint32_t>(fInnerWeight));
auto load = [](const uint8_t* cursor, int stride) -> Sk4u {
return Sk4u(cursor[0*stride], cursor[1*stride], cursor[2*stride], cursor[3*stride]);
};
auto interpolateSums = [&] (const Sk4u& outerSum, const Sk4u& innerSum) {
return
SkNx_cast<uint8_t>(
(outerSum * outerWeight + innerSum * innerWeight + kHalf) >> 24);
};
Sk4u outerSum = 0;
Sk4u innerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
innerSum = outerSum;
Sk4u leadingEdge = load(rightOuter, srcStride);
outerSum += leadingEdge;
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
leadingEdge.store(bufferCursor);
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
rightOuter += 1;
dstCursor += dstStride;
}
// slidingWindow > width
for (int i = 0; i < fNoChangeCount; i++) {
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
dstCursor += dstStride;
}
// width > slidingWindow
auto leftInner = src;
while (rightOuter < srcEnd) {
Sk4u trailEdge = Sk4u::Load(bufferCursor);
Sk4u leadingEdge = load(rightOuter, srcStride);
innerSum = outerSum - trailEdge;
outerSum += leadingEdge;
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
outerSum -= trailEdge;
leadingEdge.store(bufferCursor);
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
rightOuter += 1;
leftInner += 1;
dstCursor += dstStride;
}
auto leftOuter = srcEnd;
dstCursor = dstEnd;
outerSum = 0;
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
leftOuter -= 1;
dstCursor -= dstStride;
innerSum = outerSum;
outerSum += load(leftOuter, srcStride);
Sk4b blurred = interpolateSums(outerSum, innerSum);
blurred.store(dstCursor);
}
}
private:
static constexpr uint32_t kHalf = static_cast<uint32_t>(1) << 23;
const uint32_t fOuterWeight;
const uint32_t fInnerWeight;
const int fNoChangeCount;
const int fTrailingEdgeZeroCount;
Sk4u* const fBuffer;
Sk4u* const fBufferEnd;
};
private:
uint32_t fOuterWeight;
uint32_t fInnerWeight;
int fSlidingWindow;
};
class PlanGauss final : public PlanningInterface {
public:
explicit PlanGauss(double sigma) {
@ -505,20 +272,6 @@ public:
int fPass2Size;
};
static PlanningInterface* make_plan(SkArenaAlloc* alloc, double sigma) {
PlanningInterface* plan = nullptr;
if (3 * sigma <= 1) {
plan = alloc->make<None>();
} else if (sigma < kSmallSigma) {
plan = alloc->make<PlanBox>(sigma);
} else {
plan = alloc->make<PlanGauss>(sigma);
}
return plan;
};
// NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
// using the Gauss filter. It also limits the size of buffers used hold intermediate values.
// Explanation of maximums:
@ -569,7 +322,6 @@ static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) {
return dst;
}
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
static constexpr uint16_t _____ = 0u;
static constexpr uint16_t kHalf = 0x80u;
@ -1197,21 +949,20 @@ static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask& src, SkMa
return {radiusX, radiusY};
}
#endif // SK_USE_LEGACY_INTERP_BLUR
// TODO: assuming sigmaW = sigmaH. Allow different sigmas. Right now the
// API forces the sigmas to be the same.
SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
if (fSigmaW < 2.0 && fSigmaH < 2.0) {
return small_blur(fSigmaW, fSigmaH, src, dst);
}
#endif
if (fSigmaW < 2.0 && fSigmaH < 2.0) {
return small_blur(fSigmaW, fSigmaH, src, dst);
}
// 1024 is a place holder guess until more analysis can be done.
SkSTArenaAlloc<1024> alloc;
PlanningInterface* planW = make_plan(&alloc, fSigmaW);
PlanningInterface* planH = make_plan(&alloc, fSigmaH);
PlanningInterface* planW = alloc.make<PlanGauss>(fSigmaW);
PlanningInterface* planH = alloc.make<PlanGauss>(fSigmaH);
int borderW = planW->border(),
borderH = planH->border();
@ -1244,17 +995,7 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
// Blur horizontally, and transpose.
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
int y = 0;
if (scanW->canBlur4() && srcH > 4) {
for (;y + 4 <= srcH; y += 4) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto tmpStart = &tmp[y];
scanW->blur4Transpose(srcStart, src.fRowBytes, srcStart + srcW,
tmpStart, tmpW, tmpStart + tmpW * tmpH);
}
}
for (;y < srcH; y++) {
for (int y = 0; y < srcH; y++) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto tmpStart = &tmp[y];
scanW->blur(srcStart, 1, srcStart + srcW,
@ -1264,47 +1005,13 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
// Blur vertically (scan in memory order because of the transposition),
// and transpose back to the original orientation.
auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer);
y = 0;
if (scanH->canBlur4() && tmpH > 4) {
for (;y + 4 <= tmpH; y += 4) {
auto tmpStart = &tmp[y * tmpW];
auto dstStart = &dst->fImage[y];
scanH->blur4Transpose(
tmpStart, tmpW, tmpStart + tmpW,
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
}
}
for (;y < tmpH; y++) {
for (int y = 0; y < tmpH; y++) {
auto tmpStart = &tmp[y * tmpW];
auto dstStart = &dst->fImage[y];
scanH->blur(tmpStart, 1, tmpStart + tmpW,
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
}
} else if (planW->needsBlur()) {
// Blur only horizontally.
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
for (int y = 0; y < srcH; y++) {
auto srcStart = &src.fImage[y * src.fRowBytes];
auto dstStart = &dst->fImage[y * dst->fRowBytes];
scanW->blur(srcStart, 1, srcStart + srcW,
dstStart, 1, dstStart + dstW);
}
} else if (planH->needsBlur()) {
// Blur only vertically.
auto srcEnd = &src.fImage[src.fRowBytes * srcH];
auto dstEnd = &dst->fImage[dst->fRowBytes * dstH];
auto scanH = planH->makeBlurScan(&alloc, srcH, buffer);
for (int x = 0; x < srcW; x++) {
auto srcStart = &src.fImage[x];
auto dstStart = &dst->fImage[x];
scanH->blur(srcStart, src.fRowBytes, srcEnd,
dstStart, dst->fRowBytes, dstEnd);
}
} else {
// Copy to dst. No Blur.
SkASSERT(false); // should not get here

View File

@ -30,386 +30,6 @@ SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
}
#define UNROLL_SEPARABLE_LOOPS
/**
* This function performs a box blur in X, of the given radius. If the
* "transpose" parameter is true, it will transpose the pixels on write,
* such that X and Y are swapped. Reads are always performed from contiguous
* memory in X, for speed. The destination buffer (dst) must be at least
* (width + leftRadius + rightRadius) * height bytes in size.
*
* This is what the inner loop looks like before unrolling, and with the two
* cases broken out separately (width < diameter, width >= diameter):
*
* if (width < diameter) {
* for (int x = 0; x < width; ++x) {
* sum += *right++;
* *dptr = (sum * scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = width; x < diameter; ++x) {
* *dptr = (sum * scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = 0; x < width; ++x) {
* *dptr = (sum * scale + half) >> 24;
* sum -= *left++;
* dptr += dst_x_stride;
* }
* } else {
* for (int x = 0; x < diameter; ++x) {
* sum += *right++;
* *dptr = (sum * scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = diameter; x < width; ++x) {
* sum += *right++;
* *dptr = (sum * scale + half) >> 24;
* sum -= *left++;
* dptr += dst_x_stride;
* }
* for (int x = 0; x < diameter; ++x) {
* *dptr = (sum * scale + half) >> 24;
* sum -= *left++;
* dptr += dst_x_stride;
* }
* }
*/
template <bool Transpose>
static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
int leftRadius, int rightRadius, int width, int height)
{
int diameter = leftRadius + rightRadius;
int kernelSize = diameter + 1;
int border = SkMin32(width, diameter);
uint32_t scale = (1 << 24) / kernelSize;
int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
int dst_x_stride = Transpose ? height : 1;
int dst_y_stride = Transpose ? 1 : new_width;
uint32_t half = 1 << 23;
for (int y = 0; y < height; ++y) {
uint32_t sum = 0;
uint8_t* dptr = dst + y * dst_y_stride;
const uint8_t* right = src + y * src_y_stride;
const uint8_t* left = right;
for (int x = 0; x < rightRadius - leftRadius; x++) {
*dptr = 0;
dptr += dst_x_stride;
}
#define LEFT_BORDER_ITER \
sum += *right++; \
*dptr = (sum * scale + half) >> 24; \
dptr += dst_x_stride;
int x = 0;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < border - 16; x += 16) {
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
}
#endif
for (; x < border; ++x) {
LEFT_BORDER_ITER
}
#undef LEFT_BORDER_ITER
#define TRIVIAL_ITER \
*dptr = (sum * scale + half) >> 24; \
dptr += dst_x_stride;
x = width;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < diameter - 16; x += 16) {
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
TRIVIAL_ITER
}
#endif
for (; x < diameter; ++x) {
TRIVIAL_ITER
}
#undef TRIVIAL_ITER
#define CENTER_ITER \
sum += *right++; \
*dptr = (sum * scale + half) >> 24; \
sum -= *left++; \
dptr += dst_x_stride;
x = diameter;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < width - 16; x += 16) {
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
}
#endif
for (; x < width; ++x) {
CENTER_ITER
}
#undef CENTER_ITER
#define RIGHT_BORDER_ITER \
*dptr = (sum * scale + half) >> 24; \
sum -= *left++; \
dptr += dst_x_stride;
x = 0;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < border - 16; x += 16) {
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
}
#endif
for (; x < border; ++x) {
RIGHT_BORDER_ITER
}
#undef RIGHT_BORDER_ITER
for (int x = 0; x < leftRadius - rightRadius; ++x) {
*dptr = 0;
dptr += dst_x_stride;
}
SkASSERT(sum == 0);
}
return new_width;
}
/**
* This variant of the box blur handles blurring of non-integer radii. It
* keeps two running sums: an outer sum for the rounded-up kernel radius, and
* an inner sum for the rounded-down kernel radius. For each pixel, it linearly
* interpolates between them. In float this would be:
* outer_weight * outer_sum / kernelSize +
* (1.0 - outer_weight) * innerSum / (kernelSize - 2)
*
* This is what the inner loop looks like before unrolling, and with the two
* cases broken out separately (width < diameter, width >= diameter):
*
* if (width < diameter) {
* for (int x = 0; x < width; x++) {
* inner_sum = outer_sum;
* outer_sum += *right++;
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = width; x < diameter; ++x) {
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = 0; x < width; x++) {
* inner_sum = outer_sum - *left++;
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* outer_sum = inner_sum;
* }
* } else {
* for (int x = 0; x < diameter; x++) {
* inner_sum = outer_sum;
* outer_sum += *right++;
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* }
* for (int x = diameter; x < width; ++x) {
* inner_sum = outer_sum - *left;
* outer_sum += *right++;
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* outer_sum -= *left++;
* }
* for (int x = 0; x < diameter; x++) {
* inner_sum = outer_sum - *left++;
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
* dptr += dst_x_stride;
* outer_sum = inner_sum;
* }
* }
* }
* return new_width;
*/
template <bool Transpose>
static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
int radius, int width, int height,
uint8_t outer_weight)
{
int diameter = radius * 2;
int kernelSize = diameter + 1;
int border = SkMin32(width, diameter);
int inner_weight = 255 - outer_weight;
outer_weight += outer_weight >> 7;
inner_weight += inner_weight >> 7;
uint32_t outer_scale = (outer_weight << 16) / kernelSize;
uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
uint32_t half = 1 << 23;
int new_width = width + diameter;
int dst_x_stride = Transpose ? height : 1;
int dst_y_stride = Transpose ? 1 : new_width;
for (int y = 0; y < height; ++y) {
uint32_t outer_sum = 0, inner_sum = 0;
uint8_t* dptr = dst + y * dst_y_stride;
const uint8_t* right = src + y * src_y_stride;
const uint8_t* left = right;
int x = 0;
#define LEFT_BORDER_ITER \
inner_sum = outer_sum; \
outer_sum += *right++; \
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride;
#ifdef UNROLL_SEPARABLE_LOOPS
for (;x < border - 16; x += 16) {
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
LEFT_BORDER_ITER
}
#endif
for (;x < border; ++x) {
LEFT_BORDER_ITER
}
#undef LEFT_BORDER_ITER
for (int x = width; x < diameter; ++x) {
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
dptr += dst_x_stride;
}
x = diameter;
#define CENTER_ITER \
inner_sum = outer_sum - *left; \
outer_sum += *right++; \
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride; \
outer_sum -= *left++;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < width - 16; x += 16) {
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
CENTER_ITER
}
#endif
for (; x < width; ++x) {
CENTER_ITER
}
#undef CENTER_ITER
#define RIGHT_BORDER_ITER \
inner_sum = outer_sum - *left++; \
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride; \
outer_sum = inner_sum;
x = 0;
#ifdef UNROLL_SEPARABLE_LOOPS
for (; x < border - 16; x += 16) {
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
RIGHT_BORDER_ITER
}
#endif
for (; x < border; ++x) {
RIGHT_BORDER_ITER
}
#undef RIGHT_BORDER_ITER
SkASSERT(outer_sum == 0 && inner_sum == 0);
}
return new_width;
}
#include "SkColorData.h"
static void merge_src_with_blur(uint8_t dst[], int dstRB,
const uint8_t src[], int srcRB,
@ -484,111 +104,6 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
SkIPoint border;
#ifdef SK_SUPPORT_LEGACY_MASK_BLUR
auto get_adjusted_radii = [](SkScalar passRadius, int *loRadius, int *hiRadius) {
*loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
*loRadius = *hiRadius - 1;
}
};
// Force high quality off for small radii (performance)
if (!force_quality && sigma <= SkIntToScalar(2)) {
quality = kLow_SkBlurQuality;
}
SkScalar passRadius;
if (kHigh_SkBlurQuality == quality) {
// For the high quality path the 3 pass box blur kernel width is
// 6*rad+1 while the full Gaussian width is 6*sigma.
passRadius = sigma - (1 / 6.0f);
} else {
// For the low quality path we only attempt to cover 3*sigma of the
// Gaussian blur area (1.5*sigma on each side). The single pass box
// blur's kernel size is 2*rad+1.
passRadius = 1.5f * sigma - 0.5f;
}
// highQuality: use three box blur passes as a cheap way
// to approximate a Gaussian blur
int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
int rx = SkScalarCeilToInt(passRadius);
int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
SkASSERT(rx >= 0);
SkASSERT((unsigned)outerWeight <= 255);
if (rx <= 0) {
return false;
}
int ry = rx; // only do square blur for now
int padx = passCount * rx;
int pady = passCount * ry;
border = {padx, pady};
dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
dst->fRowBytes = dst->fBounds.width();
dst->fFormat = SkMask::kA8_Format;
dst->fImage = nullptr;
if (src.fImage) {
size_t dstSize = dst->computeImageSize();
if (0 == dstSize) {
return false; // too big to allocate, abort
}
int sw = src.fBounds.width();
int sh = src.fBounds.height();
const uint8_t* sp = src.fImage;
uint8_t* dp = SkMask::AllocImage(dstSize);
SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
// build the blurry destination
SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
uint8_t* tp = tmpBuffer.get();
int w = sw, h = sh;
if (outerWeight == 255) {
int loRadius, hiRadius;
get_adjusted_radii(passRadius, &loRadius, &hiRadius);
if (kHigh_SkBlurQuality == quality) {
// Do three X blurs, with a transpose on the final one.
w = boxBlur<false>(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h);
w = boxBlur<false>(tp, w, dp, hiRadius, loRadius, w, h);
w = boxBlur<true>(dp, w, tp, hiRadius, hiRadius, w, h);
// Do three Y blurs, with a transpose on the final one.
h = boxBlur<false>(tp, h, dp, loRadius, hiRadius, h, w);
h = boxBlur<false>(dp, h, tp, hiRadius, loRadius, h, w);
h = boxBlur<true>(tp, h, dp, hiRadius, hiRadius, h, w);
} else {
w = boxBlur<true>(sp, src.fRowBytes, tp, rx, rx, w, h);
h = boxBlur<true>(tp, h, dp, ry, ry, h, w);
}
} else {
if (kHigh_SkBlurQuality == quality) {
// Do three X blurs, with a transpose on the final one.
w = boxBlurInterp<false>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
w = boxBlurInterp<false>(tp, w, dp, rx, w, h, outerWeight);
w = boxBlurInterp<true>(dp, w, tp, rx, w, h, outerWeight);
// Do three Y blurs, with a transpose on the final one.
h = boxBlurInterp<false>(tp, h, dp, ry, h, w, outerWeight);
h = boxBlurInterp<false>(dp, h, tp, ry, h, w, outerWeight);
h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
} else {
w = boxBlurInterp<true>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
}
}
dst->fImage = autoCall.release();
}
#else
SkMaskBlurFilter blurFilter{sigma, sigma};
if (blurFilter.hasNoBlur()) {
return false;
@ -598,7 +113,6 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
if (src.fImage != nullptr && dst->fImage == nullptr) {
return false;
}
#endif // SK_SUPPORT_LEGACY_MASK_BLUR
if (src.fImage != nullptr) {
// if need be, alloc the "real" dst (same size as src) and copy/merge