Remove legacy mask blur code.
This was flag flipped about last year in the chrome code, and flag flipped about three weeks ago in Android. All the *_LEGACY_*BLUR flags are gone. Change-Id: I046f3a040ccbf12ff3a810c41b02131095df3368 Reviewed-on: https://skia-review.googlesource.com/96001 Reviewed-by: Mike Klein <mtklein@chromium.org> Reviewed-by: Florin Malita <fmalita@chromium.org> Commit-Queue: Herb Derby <herb@google.com>
This commit is contained in:
parent
dcb086bd79
commit
53d57ace99
@ -17,8 +17,6 @@
|
||||
|
||||
static const double kPi = 3.14159265358979323846264338327950288;
|
||||
|
||||
static constexpr double kSmallSigma = 2.0;
|
||||
|
||||
class BlurScanInterface {
|
||||
public:
|
||||
virtual ~BlurScanInterface() = default;
|
||||
@ -55,237 +53,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class PlanBox final : public PlanningInterface {
|
||||
public:
|
||||
explicit PlanBox(double sigma) {
|
||||
// Calculate the radius from sigma. Taken from the old code until something better is
|
||||
// figured out.
|
||||
auto possibleRadius = 1.5 * sigma - 0.5;
|
||||
auto radius = std::max(std::numeric_limits<double>::epsilon(), possibleRadius);
|
||||
auto outerRadius = std::ceil(radius);
|
||||
auto outerWindow = 2 * outerRadius + 1;
|
||||
auto outerFactor = (1 - (outerRadius - radius)) / outerWindow;
|
||||
fOuterWeight = static_cast<uint32_t>(round(outerFactor * (1ull << 24)));
|
||||
|
||||
auto innerRadius = outerRadius - 1;
|
||||
auto innerWindow = 2 * innerRadius + 1;
|
||||
auto innerFactor = (1 - (radius - innerRadius)) / innerWindow;
|
||||
fInnerWeight = static_cast<uint32_t>(round(innerFactor * (1ull << 24)));
|
||||
|
||||
// Sliding window is defined by the relationship between the outer and inner widows.
|
||||
// In the single window case, you add the element on the right, and subtract the element on
|
||||
// the left. But, because two windows are used, this relationship is more complicated; an
|
||||
// element is added from the right of the outer window, and subtracted from the left of the
|
||||
// inner window. Because innerWindow = outerWindow - 2, the distance between
|
||||
// the left and right in the two window case is outerWindow - 1.
|
||||
fSlidingWindow = static_cast<int>(outerWindow - 1);
|
||||
}
|
||||
|
||||
size_t bufferSize() const override {
|
||||
return fSlidingWindow * (sizeof(Sk4u) / sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// Remember that sliding window = window - 1. Therefore, radius = sliding window / 2.
|
||||
int border() const override { return fSlidingWindow / 2; }
|
||||
|
||||
bool needsBlur() const override { return true; }
|
||||
|
||||
BlurScanInterface* makeBlurScan(
|
||||
SkArenaAlloc* alloc, int width, uint32_t* buffer) const override
|
||||
{
|
||||
int noChangeCount,
|
||||
trailingEdgeZeroCount;
|
||||
|
||||
// The relation between the slidingWindow and the width dictates two operating modes.
|
||||
// * width >= slidingWindow - both sides of the window are contained in the image while
|
||||
// scanning. Therefore, we assume that slidingWindow zeros are consumed on the trailing
|
||||
// edge of the window. After this count, then both edges are traversing the image.
|
||||
// * slidingWindow > width - both sides of the window are off the image while scanning
|
||||
// the middle. The front edge of the window can only travel width until it falls off the
|
||||
// image. At this point, both edges of the window are off the image consuming zeros
|
||||
// and therefore, the destination value does not change. The scan produces unchanged
|
||||
// values until the trailing edge of the window enters the image. This count is
|
||||
// slidingWindow - width.
|
||||
if (width >= fSlidingWindow) {
|
||||
noChangeCount = 0;
|
||||
trailingEdgeZeroCount = fSlidingWindow;
|
||||
} else {
|
||||
noChangeCount = fSlidingWindow - width;
|
||||
trailingEdgeZeroCount = width;
|
||||
}
|
||||
|
||||
Sk4u* sk4uBuffer = reinterpret_cast<Sk4u*>(buffer);
|
||||
return alloc->make<Box>(fOuterWeight, fInnerWeight, noChangeCount, trailingEdgeZeroCount,
|
||||
sk4uBuffer, sk4uBuffer + fSlidingWindow);
|
||||
}
|
||||
|
||||
private:
|
||||
class Box final : public BlurScanInterface {
|
||||
public:
|
||||
Box(uint32_t outerWeight, uint32_t innerWeight,
|
||||
int noChangeCount, int trailingEdgeZeroCount,
|
||||
Sk4u* buffer, Sk4u* bufferEnd)
|
||||
: fOuterWeight{outerWeight}
|
||||
, fInnerWeight{innerWeight}
|
||||
, fNoChangeCount{noChangeCount}
|
||||
, fTrailingEdgeZeroCount{trailingEdgeZeroCount}
|
||||
, fBuffer{buffer}
|
||||
, fBufferEnd{bufferEnd} { }
|
||||
|
||||
void blur(const uint8_t* src, int srcStride, const uint8_t* srcEnd,
|
||||
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
|
||||
auto rightOuter = src;
|
||||
auto dstCursor = dst;
|
||||
|
||||
auto interpolateSums = [this](uint32_t outerSum, uint32_t innerSum) {
|
||||
return SkTo<uint8_t>(
|
||||
(fOuterWeight * outerSum + fInnerWeight * innerSum + kHalf) >> 24);
|
||||
};
|
||||
|
||||
uint32_t outerSum = 0;
|
||||
uint32_t innerSum = 0;
|
||||
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
|
||||
innerSum = outerSum;
|
||||
outerSum += *rightOuter;
|
||||
*dstCursor = interpolateSums(outerSum, innerSum);
|
||||
|
||||
rightOuter += srcStride;
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
// slidingWindow > width
|
||||
for (int i = 0; i < fNoChangeCount; i++) {
|
||||
*dstCursor = interpolateSums(outerSum, innerSum);;
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
// width > slidingWindow
|
||||
auto leftInner = src;
|
||||
while (rightOuter < srcEnd) {
|
||||
innerSum = outerSum - *leftInner;
|
||||
outerSum += *rightOuter;
|
||||
*dstCursor = interpolateSums(outerSum, innerSum);
|
||||
outerSum -= *leftInner;
|
||||
|
||||
rightOuter += srcStride;
|
||||
leftInner += srcStride;
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
auto leftOuter = srcEnd;
|
||||
dstCursor = dstEnd;
|
||||
outerSum = 0;
|
||||
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
|
||||
leftOuter -= srcStride;
|
||||
dstCursor -= dstStride;
|
||||
|
||||
innerSum = outerSum;
|
||||
outerSum += *leftOuter;
|
||||
*dstCursor = interpolateSums(outerSum, innerSum);
|
||||
}
|
||||
}
|
||||
|
||||
bool canBlur4() override { return true; }
|
||||
|
||||
// NB this is a transposing scan. The next src is src+1, and the next down is
|
||||
// src+srcStride.
|
||||
void blur4Transpose(
|
||||
const uint8_t* src, int srcStride, const uint8_t* srcEnd,
|
||||
uint8_t* dst, int dstStride, uint8_t* dstEnd) const override {
|
||||
auto rightOuter = src;
|
||||
auto dstCursor = dst;
|
||||
|
||||
Sk4u* const bufferStart = fBuffer;
|
||||
Sk4u* bufferCursor = bufferStart;
|
||||
Sk4u* const bufferEnd = fBufferEnd;
|
||||
|
||||
const Sk4u outerWeight(SkTo<uint32_t>(fOuterWeight));
|
||||
const Sk4u innerWeight(SkTo<uint32_t>(fInnerWeight));
|
||||
|
||||
auto load = [](const uint8_t* cursor, int stride) -> Sk4u {
|
||||
return Sk4u(cursor[0*stride], cursor[1*stride], cursor[2*stride], cursor[3*stride]);
|
||||
};
|
||||
|
||||
auto interpolateSums = [&] (const Sk4u& outerSum, const Sk4u& innerSum) {
|
||||
return
|
||||
SkNx_cast<uint8_t>(
|
||||
(outerSum * outerWeight + innerSum * innerWeight + kHalf) >> 24);
|
||||
};
|
||||
|
||||
Sk4u outerSum = 0;
|
||||
Sk4u innerSum = 0;
|
||||
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
|
||||
innerSum = outerSum;
|
||||
|
||||
Sk4u leadingEdge = load(rightOuter, srcStride);
|
||||
outerSum += leadingEdge;
|
||||
Sk4b blurred = interpolateSums(outerSum, innerSum);
|
||||
blurred.store(dstCursor);
|
||||
|
||||
leadingEdge.store(bufferCursor);
|
||||
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
|
||||
|
||||
rightOuter += 1;
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
// slidingWindow > width
|
||||
for (int i = 0; i < fNoChangeCount; i++) {
|
||||
Sk4b blurred = interpolateSums(outerSum, innerSum);
|
||||
blurred.store(dstCursor);
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
// width > slidingWindow
|
||||
auto leftInner = src;
|
||||
while (rightOuter < srcEnd) {
|
||||
Sk4u trailEdge = Sk4u::Load(bufferCursor);
|
||||
Sk4u leadingEdge = load(rightOuter, srcStride);
|
||||
innerSum = outerSum - trailEdge;
|
||||
outerSum += leadingEdge;
|
||||
|
||||
Sk4b blurred = interpolateSums(outerSum, innerSum);
|
||||
blurred.store(dstCursor);
|
||||
|
||||
outerSum -= trailEdge;
|
||||
leadingEdge.store(bufferCursor);
|
||||
bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart;
|
||||
|
||||
rightOuter += 1;
|
||||
leftInner += 1;
|
||||
dstCursor += dstStride;
|
||||
}
|
||||
|
||||
auto leftOuter = srcEnd;
|
||||
dstCursor = dstEnd;
|
||||
outerSum = 0;
|
||||
for (int i = 0; i < fTrailingEdgeZeroCount; i++) {
|
||||
leftOuter -= 1;
|
||||
dstCursor -= dstStride;
|
||||
|
||||
innerSum = outerSum;
|
||||
outerSum += load(leftOuter, srcStride);
|
||||
Sk4b blurred = interpolateSums(outerSum, innerSum);
|
||||
blurred.store(dstCursor);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr uint32_t kHalf = static_cast<uint32_t>(1) << 23;
|
||||
|
||||
const uint32_t fOuterWeight;
|
||||
const uint32_t fInnerWeight;
|
||||
const int fNoChangeCount;
|
||||
const int fTrailingEdgeZeroCount;
|
||||
Sk4u* const fBuffer;
|
||||
Sk4u* const fBufferEnd;
|
||||
};
|
||||
private:
|
||||
uint32_t fOuterWeight;
|
||||
uint32_t fInnerWeight;
|
||||
int fSlidingWindow;
|
||||
};
|
||||
|
||||
class PlanGauss final : public PlanningInterface {
|
||||
public:
|
||||
explicit PlanGauss(double sigma) {
|
||||
@ -505,20 +272,6 @@ public:
|
||||
int fPass2Size;
|
||||
};
|
||||
|
||||
static PlanningInterface* make_plan(SkArenaAlloc* alloc, double sigma) {
|
||||
PlanningInterface* plan = nullptr;
|
||||
|
||||
if (3 * sigma <= 1) {
|
||||
plan = alloc->make<None>();
|
||||
} else if (sigma < kSmallSigma) {
|
||||
plan = alloc->make<PlanBox>(sigma);
|
||||
} else {
|
||||
plan = alloc->make<PlanGauss>(sigma);
|
||||
}
|
||||
|
||||
return plan;
|
||||
};
|
||||
|
||||
// NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
|
||||
// using the Gauss filter. It also limits the size of buffers used hold intermediate values.
|
||||
// Explanation of maximums:
|
||||
@ -569,7 +322,6 @@ static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) {
|
||||
return dst;
|
||||
}
|
||||
|
||||
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
|
||||
static constexpr uint16_t _____ = 0u;
|
||||
static constexpr uint16_t kHalf = 0x80u;
|
||||
|
||||
@ -1197,21 +949,20 @@ static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask& src, SkMa
|
||||
|
||||
return {radiusX, radiusY};
|
||||
}
|
||||
#endif // SK_USE_LEGACY_INTERP_BLUR
|
||||
|
||||
// TODO: assuming sigmaW = sigmaH. Allow different sigmas. Right now the
|
||||
// API forces the sigmas to be the same.
|
||||
SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
|
||||
|
||||
#if !defined(SK_USE_LEGACY_INTERP_BLUR)
|
||||
if (fSigmaW < 2.0 && fSigmaH < 2.0) {
|
||||
return small_blur(fSigmaW, fSigmaH, src, dst);
|
||||
}
|
||||
#endif
|
||||
if (fSigmaW < 2.0 && fSigmaH < 2.0) {
|
||||
return small_blur(fSigmaW, fSigmaH, src, dst);
|
||||
}
|
||||
|
||||
// 1024 is a place holder guess until more analysis can be done.
|
||||
SkSTArenaAlloc<1024> alloc;
|
||||
|
||||
PlanningInterface* planW = make_plan(&alloc, fSigmaW);
|
||||
PlanningInterface* planH = make_plan(&alloc, fSigmaH);
|
||||
PlanningInterface* planW = alloc.make<PlanGauss>(fSigmaW);
|
||||
PlanningInterface* planH = alloc.make<PlanGauss>(fSigmaH);
|
||||
|
||||
int borderW = planW->border(),
|
||||
borderH = planH->border();
|
||||
@ -1244,17 +995,7 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
|
||||
|
||||
// Blur horizontally, and transpose.
|
||||
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
|
||||
int y = 0;
|
||||
if (scanW->canBlur4() && srcH > 4) {
|
||||
for (;y + 4 <= srcH; y += 4) {
|
||||
auto srcStart = &src.fImage[y * src.fRowBytes];
|
||||
auto tmpStart = &tmp[y];
|
||||
scanW->blur4Transpose(srcStart, src.fRowBytes, srcStart + srcW,
|
||||
tmpStart, tmpW, tmpStart + tmpW * tmpH);
|
||||
}
|
||||
}
|
||||
|
||||
for (;y < srcH; y++) {
|
||||
for (int y = 0; y < srcH; y++) {
|
||||
auto srcStart = &src.fImage[y * src.fRowBytes];
|
||||
auto tmpStart = &tmp[y];
|
||||
scanW->blur(srcStart, 1, srcStart + srcW,
|
||||
@ -1264,47 +1005,13 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
|
||||
// Blur vertically (scan in memory order because of the transposition),
|
||||
// and transpose back to the original orientation.
|
||||
auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer);
|
||||
y = 0;
|
||||
if (scanH->canBlur4() && tmpH > 4) {
|
||||
for (;y + 4 <= tmpH; y += 4) {
|
||||
auto tmpStart = &tmp[y * tmpW];
|
||||
auto dstStart = &dst->fImage[y];
|
||||
|
||||
scanH->blur4Transpose(
|
||||
tmpStart, tmpW, tmpStart + tmpW,
|
||||
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
|
||||
}
|
||||
}
|
||||
for (;y < tmpH; y++) {
|
||||
for (int y = 0; y < tmpH; y++) {
|
||||
auto tmpStart = &tmp[y * tmpW];
|
||||
auto dstStart = &dst->fImage[y];
|
||||
|
||||
scanH->blur(tmpStart, 1, tmpStart + tmpW,
|
||||
dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
|
||||
}
|
||||
} else if (planW->needsBlur()) {
|
||||
// Blur only horizontally.
|
||||
|
||||
auto scanW = planW->makeBlurScan(&alloc, srcW, buffer);
|
||||
for (int y = 0; y < srcH; y++) {
|
||||
auto srcStart = &src.fImage[y * src.fRowBytes];
|
||||
auto dstStart = &dst->fImage[y * dst->fRowBytes];
|
||||
scanW->blur(srcStart, 1, srcStart + srcW,
|
||||
dstStart, 1, dstStart + dstW);
|
||||
|
||||
}
|
||||
} else if (planH->needsBlur()) {
|
||||
// Blur only vertically.
|
||||
|
||||
auto srcEnd = &src.fImage[src.fRowBytes * srcH];
|
||||
auto dstEnd = &dst->fImage[dst->fRowBytes * dstH];
|
||||
auto scanH = planH->makeBlurScan(&alloc, srcH, buffer);
|
||||
for (int x = 0; x < srcW; x++) {
|
||||
auto srcStart = &src.fImage[x];
|
||||
auto dstStart = &dst->fImage[x];
|
||||
scanH->blur(srcStart, src.fRowBytes, srcEnd,
|
||||
dstStart, dst->fRowBytes, dstEnd);
|
||||
}
|
||||
} else {
|
||||
// Copy to dst. No Blur.
|
||||
SkASSERT(false); // should not get here
|
||||
|
@ -30,386 +30,6 @@ SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
|
||||
return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
|
||||
}
|
||||
|
||||
#define UNROLL_SEPARABLE_LOOPS
|
||||
|
||||
/**
|
||||
* This function performs a box blur in X, of the given radius. If the
|
||||
* "transpose" parameter is true, it will transpose the pixels on write,
|
||||
* such that X and Y are swapped. Reads are always performed from contiguous
|
||||
* memory in X, for speed. The destination buffer (dst) must be at least
|
||||
* (width + leftRadius + rightRadius) * height bytes in size.
|
||||
*
|
||||
* This is what the inner loop looks like before unrolling, and with the two
|
||||
* cases broken out separately (width < diameter, width >= diameter):
|
||||
*
|
||||
* if (width < diameter) {
|
||||
* for (int x = 0; x < width; ++x) {
|
||||
* sum += *right++;
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = width; x < diameter; ++x) {
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = 0; x < width; ++x) {
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* sum -= *left++;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* } else {
|
||||
* for (int x = 0; x < diameter; ++x) {
|
||||
* sum += *right++;
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = diameter; x < width; ++x) {
|
||||
* sum += *right++;
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* sum -= *left++;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = 0; x < diameter; ++x) {
|
||||
* *dptr = (sum * scale + half) >> 24;
|
||||
* sum -= *left++;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
template <bool Transpose>
|
||||
static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
|
||||
int leftRadius, int rightRadius, int width, int height)
|
||||
{
|
||||
int diameter = leftRadius + rightRadius;
|
||||
int kernelSize = diameter + 1;
|
||||
int border = SkMin32(width, diameter);
|
||||
uint32_t scale = (1 << 24) / kernelSize;
|
||||
int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
|
||||
int dst_x_stride = Transpose ? height : 1;
|
||||
int dst_y_stride = Transpose ? 1 : new_width;
|
||||
uint32_t half = 1 << 23;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
uint32_t sum = 0;
|
||||
uint8_t* dptr = dst + y * dst_y_stride;
|
||||
const uint8_t* right = src + y * src_y_stride;
|
||||
const uint8_t* left = right;
|
||||
for (int x = 0; x < rightRadius - leftRadius; x++) {
|
||||
*dptr = 0;
|
||||
dptr += dst_x_stride;
|
||||
}
|
||||
#define LEFT_BORDER_ITER \
|
||||
sum += *right++; \
|
||||
*dptr = (sum * scale + half) >> 24; \
|
||||
dptr += dst_x_stride;
|
||||
|
||||
int x = 0;
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < border - 16; x += 16) {
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < border; ++x) {
|
||||
LEFT_BORDER_ITER
|
||||
}
|
||||
#undef LEFT_BORDER_ITER
|
||||
#define TRIVIAL_ITER \
|
||||
*dptr = (sum * scale + half) >> 24; \
|
||||
dptr += dst_x_stride;
|
||||
x = width;
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < diameter - 16; x += 16) {
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
TRIVIAL_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < diameter; ++x) {
|
||||
TRIVIAL_ITER
|
||||
}
|
||||
#undef TRIVIAL_ITER
|
||||
#define CENTER_ITER \
|
||||
sum += *right++; \
|
||||
*dptr = (sum * scale + half) >> 24; \
|
||||
sum -= *left++; \
|
||||
dptr += dst_x_stride;
|
||||
|
||||
x = diameter;
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < width - 16; x += 16) {
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < width; ++x) {
|
||||
CENTER_ITER
|
||||
}
|
||||
#undef CENTER_ITER
|
||||
#define RIGHT_BORDER_ITER \
|
||||
*dptr = (sum * scale + half) >> 24; \
|
||||
sum -= *left++; \
|
||||
dptr += dst_x_stride;
|
||||
|
||||
x = 0;
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < border - 16; x += 16) {
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < border; ++x) {
|
||||
RIGHT_BORDER_ITER
|
||||
}
|
||||
#undef RIGHT_BORDER_ITER
|
||||
for (int x = 0; x < leftRadius - rightRadius; ++x) {
|
||||
*dptr = 0;
|
||||
dptr += dst_x_stride;
|
||||
}
|
||||
SkASSERT(sum == 0);
|
||||
}
|
||||
return new_width;
|
||||
}
|
||||
|
||||
/**
|
||||
* This variant of the box blur handles blurring of non-integer radii. It
|
||||
* keeps two running sums: an outer sum for the rounded-up kernel radius, and
|
||||
* an inner sum for the rounded-down kernel radius. For each pixel, it linearly
|
||||
* interpolates between them. In float this would be:
|
||||
* outer_weight * outer_sum / kernelSize +
|
||||
* (1.0 - outer_weight) * innerSum / (kernelSize - 2)
|
||||
*
|
||||
* This is what the inner loop looks like before unrolling, and with the two
|
||||
* cases broken out separately (width < diameter, width >= diameter):
|
||||
*
|
||||
* if (width < diameter) {
|
||||
* for (int x = 0; x < width; x++) {
|
||||
* inner_sum = outer_sum;
|
||||
* outer_sum += *right++;
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = width; x < diameter; ++x) {
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = 0; x < width; x++) {
|
||||
* inner_sum = outer_sum - *left++;
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* outer_sum = inner_sum;
|
||||
* }
|
||||
* } else {
|
||||
* for (int x = 0; x < diameter; x++) {
|
||||
* inner_sum = outer_sum;
|
||||
* outer_sum += *right++;
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* }
|
||||
* for (int x = diameter; x < width; ++x) {
|
||||
* inner_sum = outer_sum - *left;
|
||||
* outer_sum += *right++;
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* outer_sum -= *left++;
|
||||
* }
|
||||
* for (int x = 0; x < diameter; x++) {
|
||||
* inner_sum = outer_sum - *left++;
|
||||
* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
* dptr += dst_x_stride;
|
||||
* outer_sum = inner_sum;
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* return new_width;
|
||||
*/
|
||||
|
||||
template <bool Transpose>
|
||||
static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
|
||||
int radius, int width, int height,
|
||||
uint8_t outer_weight)
|
||||
{
|
||||
int diameter = radius * 2;
|
||||
int kernelSize = diameter + 1;
|
||||
int border = SkMin32(width, diameter);
|
||||
int inner_weight = 255 - outer_weight;
|
||||
outer_weight += outer_weight >> 7;
|
||||
inner_weight += inner_weight >> 7;
|
||||
uint32_t outer_scale = (outer_weight << 16) / kernelSize;
|
||||
uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
|
||||
uint32_t half = 1 << 23;
|
||||
int new_width = width + diameter;
|
||||
int dst_x_stride = Transpose ? height : 1;
|
||||
int dst_y_stride = Transpose ? 1 : new_width;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
uint32_t outer_sum = 0, inner_sum = 0;
|
||||
uint8_t* dptr = dst + y * dst_y_stride;
|
||||
const uint8_t* right = src + y * src_y_stride;
|
||||
const uint8_t* left = right;
|
||||
int x = 0;
|
||||
|
||||
#define LEFT_BORDER_ITER \
|
||||
inner_sum = outer_sum; \
|
||||
outer_sum += *right++; \
|
||||
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
|
||||
dptr += dst_x_stride;
|
||||
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (;x < border - 16; x += 16) {
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
LEFT_BORDER_ITER
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;x < border; ++x) {
|
||||
LEFT_BORDER_ITER
|
||||
}
|
||||
#undef LEFT_BORDER_ITER
|
||||
for (int x = width; x < diameter; ++x) {
|
||||
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
|
||||
dptr += dst_x_stride;
|
||||
}
|
||||
x = diameter;
|
||||
|
||||
#define CENTER_ITER \
|
||||
inner_sum = outer_sum - *left; \
|
||||
outer_sum += *right++; \
|
||||
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
|
||||
dptr += dst_x_stride; \
|
||||
outer_sum -= *left++;
|
||||
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < width - 16; x += 16) {
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
CENTER_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < width; ++x) {
|
||||
CENTER_ITER
|
||||
}
|
||||
#undef CENTER_ITER
|
||||
|
||||
#define RIGHT_BORDER_ITER \
|
||||
inner_sum = outer_sum - *left++; \
|
||||
*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
|
||||
dptr += dst_x_stride; \
|
||||
outer_sum = inner_sum;
|
||||
|
||||
x = 0;
|
||||
#ifdef UNROLL_SEPARABLE_LOOPS
|
||||
for (; x < border - 16; x += 16) {
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
RIGHT_BORDER_ITER
|
||||
}
|
||||
#endif
|
||||
for (; x < border; ++x) {
|
||||
RIGHT_BORDER_ITER
|
||||
}
|
||||
#undef RIGHT_BORDER_ITER
|
||||
SkASSERT(outer_sum == 0 && inner_sum == 0);
|
||||
}
|
||||
return new_width;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "SkColorData.h"
|
||||
|
||||
static void merge_src_with_blur(uint8_t dst[], int dstRB,
|
||||
const uint8_t src[], int srcRB,
|
||||
@ -484,111 +104,6 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
|
||||
|
||||
SkIPoint border;
|
||||
|
||||
#ifdef SK_SUPPORT_LEGACY_MASK_BLUR
|
||||
|
||||
auto get_adjusted_radii = [](SkScalar passRadius, int *loRadius, int *hiRadius) {
|
||||
*loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
|
||||
if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
|
||||
*loRadius = *hiRadius - 1;
|
||||
}
|
||||
};
|
||||
|
||||
// Force high quality off for small radii (performance)
|
||||
if (!force_quality && sigma <= SkIntToScalar(2)) {
|
||||
quality = kLow_SkBlurQuality;
|
||||
}
|
||||
|
||||
SkScalar passRadius;
|
||||
if (kHigh_SkBlurQuality == quality) {
|
||||
// For the high quality path the 3 pass box blur kernel width is
|
||||
// 6*rad+1 while the full Gaussian width is 6*sigma.
|
||||
passRadius = sigma - (1 / 6.0f);
|
||||
} else {
|
||||
// For the low quality path we only attempt to cover 3*sigma of the
|
||||
// Gaussian blur area (1.5*sigma on each side). The single pass box
|
||||
// blur's kernel size is 2*rad+1.
|
||||
passRadius = 1.5f * sigma - 0.5f;
|
||||
}
|
||||
|
||||
// highQuality: use three box blur passes as a cheap way
|
||||
// to approximate a Gaussian blur
|
||||
int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
|
||||
|
||||
int rx = SkScalarCeilToInt(passRadius);
|
||||
int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
|
||||
|
||||
SkASSERT(rx >= 0);
|
||||
SkASSERT((unsigned)outerWeight <= 255);
|
||||
if (rx <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int ry = rx; // only do square blur for now
|
||||
|
||||
int padx = passCount * rx;
|
||||
int pady = passCount * ry;
|
||||
|
||||
border = {padx, pady};
|
||||
|
||||
dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
|
||||
src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
|
||||
|
||||
dst->fRowBytes = dst->fBounds.width();
|
||||
dst->fFormat = SkMask::kA8_Format;
|
||||
dst->fImage = nullptr;
|
||||
|
||||
if (src.fImage) {
|
||||
size_t dstSize = dst->computeImageSize();
|
||||
if (0 == dstSize) {
|
||||
return false; // too big to allocate, abort
|
||||
}
|
||||
|
||||
int sw = src.fBounds.width();
|
||||
int sh = src.fBounds.height();
|
||||
const uint8_t* sp = src.fImage;
|
||||
uint8_t* dp = SkMask::AllocImage(dstSize);
|
||||
SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
|
||||
|
||||
// build the blurry destination
|
||||
SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
|
||||
uint8_t* tp = tmpBuffer.get();
|
||||
int w = sw, h = sh;
|
||||
|
||||
if (outerWeight == 255) {
|
||||
int loRadius, hiRadius;
|
||||
get_adjusted_radii(passRadius, &loRadius, &hiRadius);
|
||||
if (kHigh_SkBlurQuality == quality) {
|
||||
// Do three X blurs, with a transpose on the final one.
|
||||
w = boxBlur<false>(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h);
|
||||
w = boxBlur<false>(tp, w, dp, hiRadius, loRadius, w, h);
|
||||
w = boxBlur<true>(dp, w, tp, hiRadius, hiRadius, w, h);
|
||||
// Do three Y blurs, with a transpose on the final one.
|
||||
h = boxBlur<false>(tp, h, dp, loRadius, hiRadius, h, w);
|
||||
h = boxBlur<false>(dp, h, tp, hiRadius, loRadius, h, w);
|
||||
h = boxBlur<true>(tp, h, dp, hiRadius, hiRadius, h, w);
|
||||
} else {
|
||||
w = boxBlur<true>(sp, src.fRowBytes, tp, rx, rx, w, h);
|
||||
h = boxBlur<true>(tp, h, dp, ry, ry, h, w);
|
||||
}
|
||||
} else {
|
||||
if (kHigh_SkBlurQuality == quality) {
|
||||
// Do three X blurs, with a transpose on the final one.
|
||||
w = boxBlurInterp<false>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
|
||||
w = boxBlurInterp<false>(tp, w, dp, rx, w, h, outerWeight);
|
||||
w = boxBlurInterp<true>(dp, w, tp, rx, w, h, outerWeight);
|
||||
// Do three Y blurs, with a transpose on the final one.
|
||||
h = boxBlurInterp<false>(tp, h, dp, ry, h, w, outerWeight);
|
||||
h = boxBlurInterp<false>(dp, h, tp, ry, h, w, outerWeight);
|
||||
h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
|
||||
} else {
|
||||
w = boxBlurInterp<true>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
|
||||
h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
|
||||
}
|
||||
}
|
||||
|
||||
dst->fImage = autoCall.release();
|
||||
}
|
||||
#else
|
||||
SkMaskBlurFilter blurFilter{sigma, sigma};
|
||||
if (blurFilter.hasNoBlur()) {
|
||||
return false;
|
||||
@ -598,7 +113,6 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
|
||||
if (src.fImage != nullptr && dst->fImage == nullptr) {
|
||||
return false;
|
||||
}
|
||||
#endif // SK_SUPPORT_LEGACY_MASK_BLUR
|
||||
|
||||
if (src.fImage != nullptr) {
|
||||
// if need be, alloc the "real" dst (same size as src) and copy/merge
|
||||
|
Loading…
Reference in New Issue
Block a user