Refactor resize filter to go faster

Part of this CL improves the speed by using dynamic arrays
more effectively.

Part uses SIMD and more concise float expressions for speed.

Some unused code was deleted.

The latter changes are guarded by:

  SK_SUPPORT_LEGACY_BITMAP_FILTER

until this lands and the corresponding layout changes in
chrome can be relanded.

With the legacy flag defined, no Skia or Chrome test results
change. Without the flag defined in Skia, only 0.01% - 0.02%
of the pixels change, and then by (1,1,1) in 8888.

codereview.chromium.org/1583533002 adds the guard to Chrome.

R=reed@google.com
BUG=skia:2261
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1563183003

Review URL: https://codereview.chromium.org/1563183003
This commit is contained in:
caryclark 2016-01-12 10:44:02 -08:00 committed by Commit bot
parent 05dcb4c7a9
commit 368342ccb5
5 changed files with 181 additions and 75 deletions

View File

@ -20,7 +20,7 @@ SK_CONF_DECLARE(const char *, c_bitmapFilter, "bitmap.filter", "mitchell", "Whic
SkBitmapFilter *SkBitmapFilter::Allocate() {
if (!strcmp(c_bitmapFilter, "mitchell")) {
return new SkMitchellFilter(1.f / 3.f, 1.f / 3.f);
return new SkMitchellFilter;
} else if (!strcmp(c_bitmapFilter, "lanczos")) {
return new SkLanczosFilter;
} else if (!strcmp(c_bitmapFilter, "hamming")) {

View File

@ -12,6 +12,10 @@
#include "SkMath.h"
#include "SkScalar.h"
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
#include "SkNx.h"
#endif
// size of the precomputed bitmap filter tables for high quality filtering.
// Used to precompute the shape of the filter kernel.
// Table size chosen from experiments to see where I could start to see a difference.
@ -47,6 +51,20 @@ class SkBitmapFilter {
float width() const { return fWidth; }
float invWidth() const { return fInvWidth; }
virtual float evaluate(float x) const = 0;
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
virtual float evaluate_n(float val, float diff, int count, float* output) const {
float sum = 0;
for (int index = 0; index < count; index++) {
float filterValue = evaluate(val);
*output++ = filterValue;
sum += filterValue;
val += diff;
}
return sum;
}
#endif
virtual ~SkBitmapFilter() {}
static SkBitmapFilter* Allocate();
@ -73,27 +91,98 @@ class SkBitmapFilter {
}
};
class SkMitchellFilter: public SkBitmapFilter {
class SkMitchellFilter : public SkBitmapFilter {
public:
SkMitchellFilter(float b, float c, float width=2.0f)
: SkBitmapFilter(width), B(b), C(c) {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
SkMitchellFilter()
: INHERITED(2), B(1.f / 3), C(1.f / 3) {
}
#else
SkMitchellFilter()
: INHERITED(2)
, fB(1.f / 3.f)
, fC(1.f / 3.f)
, fA1(-fB - 6*fC)
, fB1(6*fB + 30*fC)
, fC1(-12*fB - 48*fC)
, fD1(8*fB + 24*fC)
, fA2(12 - 9*fB - 6*fC)
, fB2(-18 + 12*fB + 6*fC)
, fD2(6 - 2*fB) {
}
#endif
float evaluate(float x) const override {
x = fabsf(x);
if (x > 2.f) {
return 0;
} else if (x > 1.f) {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
return ((-B - 6*C) * x*x*x + (6*B + 30*C) * x*x +
(-12*B - 48*C) * x + (8*B + 24*C)) * (1.f/6.f);
#else
return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
#endif
} else {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
return ((12 - 9*B - 6*C) * x*x*x +
(-18 + 12*B + 6*C) * x*x +
(6 - 2*B)) * (1.f/6.f);
#else
return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
#endif
}
}
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
// TODO : native Sk4f abs
static Sk4f abs(const Sk4f& x) {
Sk4f neg = x < Sk4f(0);
return neg.thenElse(Sk4f(0) - x, x);
}
Sk4f evalcore_n(const Sk4f& val) const {
Sk4f x = abs(val);
Sk4f over2 = x > Sk4f(2);
Sk4f over1 = x > Sk4f(1);
Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
* Sk4f(1.f/6.f);
Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
}
float evaluate_n(float val, float diff, int count, float* output) const override {
Sk4f sum(0);
while (count >= 4) {
float v0 = val;
float v1 = val += diff;
float v2 = val += diff;
float v3 = val += diff;
val += diff;
Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
filterValue.store(output);
output += 4;
sum = sum + filterValue;
count -= 4;
}
float sums[4];
sum.store(sums);
float result = sums[0] + sums[1] + sums[2] + sums[3];
result += INHERITED::evaluate_n(val, diff, count, output);
return result;
}
#endif
protected:
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float B, C;
#else
float fB, fC;
float fA1, fB1, fC1, fD1;
float fA2, fB2, fD2;
#endif
private:
typedef SkBitmapFilter INHERITED;
};
class SkGaussianFilter: public SkBitmapFilter {

View File

@ -11,7 +11,6 @@
#include "SkImageInfo.h"
#include "SkPixmap.h"
#include "SkRect.h"
#include "SkScalar.h"
#include "SkTArray.h"
// SkResizeFilter ----------------------------------------------------------------
@ -74,7 +73,7 @@ SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
fBitmapFilter = new SkTriangleFilter;
break;
case SkBitmapScaler::RESIZE_MITCHELL:
fBitmapFilter = new SkMitchellFilter(1.f / 3.f, 1.f / 3.f);
fBitmapFilter = new SkMitchellFilter;
break;
case SkBitmapScaler::RESIZE_HAMMING:
fBitmapFilter = new SkHammingFilter;
@ -130,45 +129,65 @@ void SkResizeFilter::computeFilters(int srcSize,
// to support the filtering function.
float srcSupport = fBitmapFilter->width() / clampedScale;
// Speed up the divisions below by turning them into multiplies.
float invScale = 1.0f / scale;
SkTArray<float> filterValues(64);
SkTArray<short> fixedFilterValues(64);
SkSTArray<64, float, true> filterValuesArray;
SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;
// Loop over all pixels in the output range. We will generate one set of
// filter values for each one. Those values will tell us how to blend the
// source pixels to compute the destination pixel.
// This is the pixel in the source directly under the pixel in the dest.
// Note that we base computations on the "center" of the pixels. To see
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
// downscale should "cover" the pixels around the pixel with *its center*
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
int destLimit = SkScalarTruncToInt(SkScalarCeilToScalar(destSubsetHi)
- SkScalarFloorToScalar(destSubsetLo));
#else
destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
float srcPixel = (destSubsetLo + 0.5f) * invScale;
int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
#endif
output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
for (int destSubsetI = SkScalarFloorToInt(destSubsetLo); destSubsetI < SkScalarCeilToInt(destSubsetHi);
destSubsetI++) {
// Reset the arrays. We don't declare them inside so they can re-use the
// same malloc-ed buffer.
filterValues.reset();
fixedFilterValues.reset();
// This is the pixel in the source directly under the pixel in the dest.
// Note that we base computations on the "center" of the pixels. To see
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
// downscale should "cover" the pixels around the pixel with *its center*
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
destSubsetI++)
#else
for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++)
#endif
{
// Compute the (inclusive) range of source pixels the filter covers.
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport));
int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport));
#else
float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
#endif
// Compute the unnormalized filter value at each location of the source
// it covers.
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float filterSum = 0.0f; // Sub of the filter values for normalizing.
int filterCount = srcEnd - srcBegin + 1;
filterValuesArray.reset(filterCount);
for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd;
curFilterPixel++) {
// Distance from the center of the filter, this is the filter coordinate
// in source space. We also need to consider the center of the pixel
// when comparing distance against 'srcPixel'. In the 5x downscale
// example used above the distance from the center of the filter to
// the pixel with coordinates (2, 2) should be 0, because its center
// is at (2.5, 2.5).
#endif
// Sum of the filter values for normalizing.
// Distance from the center of the filter, this is the filter coordinate
// in source space. We also need to consider the center of the pixel
// when comparing distance against 'srcPixel'. In the 5x downscale
// example used above the distance from the center of the filter to
// the pixel with coordinates (2, 2) should be 0, because its center
// is at (2.5, 2.5).
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float srcFilterDist =
((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel);
@ -177,36 +196,56 @@ void SkResizeFilter::computeFilters(int srcSize,
// Compute the filter value at that location.
float filterValue = fBitmapFilter->evaluate(destFilterDist);
filterValues.push_back(filterValue);
filterValuesArray[curFilterPixel - srcBegin] = filterValue;
filterSum += filterValue;
}
SkASSERT(!filterValues.empty());
#else
float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
SkASSERT(filterCount > 0);
filterValuesArray.reset(filterCount);
float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
filterValuesArray.begin());
#endif
// The filter must be normalized so that we don't affect the brightness of
// the image. Convert to normalized fixed point.
short fixedSum = 0;
for (int i = 0; i < filterValues.count(); i++) {
short curFixed = output->FloatToFixed(filterValues[i] / filterSum);
int fixedSum = 0;
fixedFilterValuesArray.reset(filterCount);
const float* filterValues = filterValuesArray.begin();
SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
float invFilterSum = 1 / filterSum;
#endif
for (int fixedI = 0; fixedI < filterCount; fixedI++) {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] / filterSum);
#else
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
#endif
fixedSum += curFixed;
fixedFilterValues.push_back(curFixed);
fixedFilterValues[fixedI] = SkToS16(curFixed);
}
SkASSERT(fixedSum <= 0x7FFF);
// The conversion to fixed point will leave some rounding errors, which
// we add back in to avoid affecting the brightness of the image. We
// arbitrarily add this to the center of the filter array (this won't always
// be the center of the filter function since it could get clipped on the
// edges, but it doesn't matter enough to worry about that case).
short leftovers = output->FloatToFixed(1.0f) - fixedSum;
fixedFilterValues[fixedFilterValues.count() / 2] += leftovers;
int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
fixedFilterValues[filterCount / 2] += leftovers;
// Now it's ready to go.
output->AddFilter(srcBegin, &fixedFilterValues[0],
static_cast<int>(fixedFilterValues.count()));
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
output->AddFilter(srcBegin, fixedFilterValues, filterCount);
#else
output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
#endif
}
if (convolveProcs.fApplySIMDPadding) {
convolveProcs.fApplySIMDPadding( output );
convolveProcs.fApplySIMDPadding(output);
}
}

View File

@ -3,9 +3,7 @@
// found in the LICENSE file.
#include "SkConvolver.h"
#include "SkMath.h"
#include "SkSize.h"
#include "SkTypes.h"
#include "SkTArray.h"
namespace {
@ -284,21 +282,6 @@ SkConvolutionFilter1D::SkConvolutionFilter1D()
SkConvolutionFilter1D::~SkConvolutionFilter1D() {
}
void SkConvolutionFilter1D::AddFilter(int filterOffset,
const float* filterValues,
int filterLength) {
SkASSERT(filterLength > 0);
SkTArray<ConvolutionFixed> fixedValues;
fixedValues.reset(filterLength);
for (int i = 0; i < filterLength; ++i) {
fixedValues.push_back(FloatToFixed(filterValues[i]));
}
AddFilter(filterOffset, &fixedValues[0], filterLength);
}
void SkConvolutionFilter1D::AddFilter(int filterOffset,
const ConvolutionFixed* filterValues,
int filterLength) {
@ -323,9 +306,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
filterLength = lastNonZero + 1 - firstNonZero;
SkASSERT(filterLength > 0);
for (int i = firstNonZero; i <= lastNonZero; i++) {
fFilterValues.push_back(filterValues[i]);
}
fFilterValues.append(filterLength, &filterValues[firstNonZero]);
} else {
// Here all the factors were zeroes.
filterLength = 0;
@ -339,7 +320,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
instance.fOffset = filterOffset;
instance.fTrimmedLength = filterLength;
instance.fLength = filterSize;
fFilters.push_back(instance);
fFilters.push(instance);
fMaxFilter = SkTMax(fMaxFilter, filterLength);
}

View File

@ -6,8 +6,7 @@
#define SK_CONVOLVER_H
#include "SkSize.h"
#include "SkTypes.h"
#include "SkTArray.h"
#include "SkTDArray.h"
// avoid confusion with Mac OS X's math library (Carbon)
#if defined(__APPLE__)
@ -58,6 +57,11 @@ public:
// output image.
int numValues() const { return static_cast<int>(fFilters.count()); }
void reserveAdditional(int filterCount, int filterValueCount) {
fFilters.setReserve(fFilters.count() + filterCount);
fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
}
// Appends the given list of scaling values for generating a given output
// pixel. |filterOffset| is the distance from the edge of the image to where
// the scaling factors start. The scaling factors apply to the source pixels
@ -68,13 +72,6 @@ public:
// brighness of the image.
//
// The filterLength must be > 0.
//
// This version will automatically convert your input to ConvolutionFixed point.
SK_API void AddFilter(int filterOffset,
const float* filterValues,
int filterLength);
// Same as the above version, but the input is already ConvolutionFixed point.
void AddFilter(int filterOffset,
const ConvolutionFixed* filterValues,
int filterLength);
@ -112,7 +109,7 @@ public:
// SIMD padding which happens outside of this class.
void addFilterValue( ConvolutionFixed val ) {
fFilterValues.push_back( val );
fFilterValues.push( val );
}
private:
struct FilterInstance {
@ -132,12 +129,12 @@ private:
};
// Stores the information for each filter added to this class.
SkTArray<FilterInstance> fFilters;
SkTDArray<FilterInstance> fFilters;
// We store all the filter values in this flat list, indexed by
// |FilterInstance.data_location| to avoid the mallocs required for storing
// each one separately.
SkTArray<ConvolutionFixed> fFilterValues;
SkTDArray<ConvolutionFixed> fFilterValues;
// The maximum size of any filter we've added.
int fMaxFilter;