Refactor resize filter to go faster

Part of this CL improves the speed by using dynamic arrays
more effectively.

Part uses SIMD and more concise float expressions for speed.

Some unused code was deleted.

The latter changes are guarded by:

  SK_SUPPORT_LEGACY_BITMAP_FILTER

until this lands and the corresponding layout changes in
chrome can be relanded.

With the legacy flag defined, no Skia or Chrome test results
change. Without the flag defined in Skia, only 0.01% - 0.02%
of the pixels change, and then by (1,1,1) in 8888.

codereview.chromium.org/1583533002 adds the guard to Chrome.

R=reed@google.com
BUG=skia:2261
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1563183003

Review URL: https://codereview.chromium.org/1563183003
This commit is contained in:
caryclark 2016-01-12 10:44:02 -08:00 committed by Commit bot
parent 05dcb4c7a9
commit 368342ccb5
5 changed files with 181 additions and 75 deletions

View File

@ -20,7 +20,7 @@ SK_CONF_DECLARE(const char *, c_bitmapFilter, "bitmap.filter", "mitchell", "Whic
SkBitmapFilter *SkBitmapFilter::Allocate() { SkBitmapFilter *SkBitmapFilter::Allocate() {
if (!strcmp(c_bitmapFilter, "mitchell")) { if (!strcmp(c_bitmapFilter, "mitchell")) {
return new SkMitchellFilter(1.f / 3.f, 1.f / 3.f); return new SkMitchellFilter;
} else if (!strcmp(c_bitmapFilter, "lanczos")) { } else if (!strcmp(c_bitmapFilter, "lanczos")) {
return new SkLanczosFilter; return new SkLanczosFilter;
} else if (!strcmp(c_bitmapFilter, "hamming")) { } else if (!strcmp(c_bitmapFilter, "hamming")) {

View File

@ -12,6 +12,10 @@
#include "SkMath.h" #include "SkMath.h"
#include "SkScalar.h" #include "SkScalar.h"
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
#include "SkNx.h"
#endif
// size of the precomputed bitmap filter tables for high quality filtering. // size of the precomputed bitmap filter tables for high quality filtering.
// Used to precompute the shape of the filter kernel. // Used to precompute the shape of the filter kernel.
// Table size chosen from experiments to see where I could start to see a difference. // Table size chosen from experiments to see where I could start to see a difference.
@ -47,6 +51,20 @@ class SkBitmapFilter {
float width() const { return fWidth; } float width() const { return fWidth; }
float invWidth() const { return fInvWidth; } float invWidth() const { return fInvWidth; }
virtual float evaluate(float x) const = 0; virtual float evaluate(float x) const = 0;
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
virtual float evaluate_n(float val, float diff, int count, float* output) const {
float sum = 0;
for (int index = 0; index < count; index++) {
float filterValue = evaluate(val);
*output++ = filterValue;
sum += filterValue;
val += diff;
}
return sum;
}
#endif
virtual ~SkBitmapFilter() {} virtual ~SkBitmapFilter() {}
static SkBitmapFilter* Allocate(); static SkBitmapFilter* Allocate();
@ -73,27 +91,98 @@ class SkBitmapFilter {
} }
}; };
class SkMitchellFilter: public SkBitmapFilter { class SkMitchellFilter : public SkBitmapFilter {
public: public:
SkMitchellFilter(float b, float c, float width=2.0f) #ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
: SkBitmapFilter(width), B(b), C(c) { SkMitchellFilter()
: INHERITED(2), B(1.f / 3), C(1.f / 3) {
} }
#else
SkMitchellFilter()
: INHERITED(2)
, fB(1.f / 3.f)
, fC(1.f / 3.f)
, fA1(-fB - 6*fC)
, fB1(6*fB + 30*fC)
, fC1(-12*fB - 48*fC)
, fD1(8*fB + 24*fC)
, fA2(12 - 9*fB - 6*fC)
, fB2(-18 + 12*fB + 6*fC)
, fD2(6 - 2*fB) {
}
#endif
float evaluate(float x) const override { float evaluate(float x) const override {
x = fabsf(x); x = fabsf(x);
if (x > 2.f) { if (x > 2.f) {
return 0; return 0;
} else if (x > 1.f) { } else if (x > 1.f) {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
return ((-B - 6*C) * x*x*x + (6*B + 30*C) * x*x + return ((-B - 6*C) * x*x*x + (6*B + 30*C) * x*x +
(-12*B - 48*C) * x + (8*B + 24*C)) * (1.f/6.f); (-12*B - 48*C) * x + (8*B + 24*C)) * (1.f/6.f);
#else
return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
#endif
} else { } else {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
return ((12 - 9*B - 6*C) * x*x*x + return ((12 - 9*B - 6*C) * x*x*x +
(-18 + 12*B + 6*C) * x*x + (-18 + 12*B + 6*C) * x*x +
(6 - 2*B)) * (1.f/6.f); (6 - 2*B)) * (1.f/6.f);
#else
return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
#endif
} }
} }
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
// TODO : native Sk4f abs
static Sk4f abs(const Sk4f& x) {
Sk4f neg = x < Sk4f(0);
return neg.thenElse(Sk4f(0) - x, x);
}
Sk4f evalcore_n(const Sk4f& val) const {
Sk4f x = abs(val);
Sk4f over2 = x > Sk4f(2);
Sk4f over1 = x > Sk4f(1);
Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
* Sk4f(1.f/6.f);
Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
}
float evaluate_n(float val, float diff, int count, float* output) const override {
Sk4f sum(0);
while (count >= 4) {
float v0 = val;
float v1 = val += diff;
float v2 = val += diff;
float v3 = val += diff;
val += diff;
Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
filterValue.store(output);
output += 4;
sum = sum + filterValue;
count -= 4;
}
float sums[4];
sum.store(sums);
float result = sums[0] + sums[1] + sums[2] + sums[3];
result += INHERITED::evaluate_n(val, diff, count, output);
return result;
}
#endif
protected: protected:
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float B, C; float B, C;
#else
float fB, fC;
float fA1, fB1, fC1, fD1;
float fA2, fB2, fD2;
#endif
private:
typedef SkBitmapFilter INHERITED;
}; };
class SkGaussianFilter: public SkBitmapFilter { class SkGaussianFilter: public SkBitmapFilter {

View File

@ -11,7 +11,6 @@
#include "SkImageInfo.h" #include "SkImageInfo.h"
#include "SkPixmap.h" #include "SkPixmap.h"
#include "SkRect.h" #include "SkRect.h"
#include "SkScalar.h"
#include "SkTArray.h" #include "SkTArray.h"
// SkResizeFilter ---------------------------------------------------------------- // SkResizeFilter ----------------------------------------------------------------
@ -74,7 +73,7 @@ SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
fBitmapFilter = new SkTriangleFilter; fBitmapFilter = new SkTriangleFilter;
break; break;
case SkBitmapScaler::RESIZE_MITCHELL: case SkBitmapScaler::RESIZE_MITCHELL:
fBitmapFilter = new SkMitchellFilter(1.f / 3.f, 1.f / 3.f); fBitmapFilter = new SkMitchellFilter;
break; break;
case SkBitmapScaler::RESIZE_HAMMING: case SkBitmapScaler::RESIZE_HAMMING:
fBitmapFilter = new SkHammingFilter; fBitmapFilter = new SkHammingFilter;
@ -130,45 +129,65 @@ void SkResizeFilter::computeFilters(int srcSize,
// to support the filtering function. // to support the filtering function.
float srcSupport = fBitmapFilter->width() / clampedScale; float srcSupport = fBitmapFilter->width() / clampedScale;
// Speed up the divisions below by turning them into multiplies.
float invScale = 1.0f / scale; float invScale = 1.0f / scale;
SkTArray<float> filterValues(64); SkSTArray<64, float, true> filterValuesArray;
SkTArray<short> fixedFilterValues(64); SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;
// Loop over all pixels in the output range. We will generate one set of // Loop over all pixels in the output range. We will generate one set of
// filter values for each one. Those values will tell us how to blend the // filter values for each one. Those values will tell us how to blend the
// source pixels to compute the destination pixel. // source pixels to compute the destination pixel.
// This is the pixel in the source directly under the pixel in the dest.
// Note that we base computations on the "center" of the pixels. To see
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
// downscale should "cover" the pixels around the pixel with *its center*
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
int destLimit = SkScalarTruncToInt(SkScalarCeilToScalar(destSubsetHi)
- SkScalarFloorToScalar(destSubsetLo));
#else
destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
float srcPixel = (destSubsetLo + 0.5f) * invScale;
int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
#endif
output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
for (int destSubsetI = SkScalarFloorToInt(destSubsetLo); destSubsetI < SkScalarCeilToInt(destSubsetHi); for (int destSubsetI = SkScalarFloorToInt(destSubsetLo); destSubsetI < SkScalarCeilToInt(destSubsetHi);
destSubsetI++) { destSubsetI++)
// Reset the arrays. We don't declare them inside so they can re-use the #else
// same malloc-ed buffer. for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++)
filterValues.reset(); #endif
fixedFilterValues.reset(); {
// This is the pixel in the source directly under the pixel in the dest.
// Note that we base computations on the "center" of the pixels. To see
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
// downscale should "cover" the pixels around the pixel with *its center*
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
// Compute the (inclusive) range of source pixels the filter covers. // Compute the (inclusive) range of source pixels the filter covers.
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport)); int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport));
int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport)); int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport));
#else
float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
#endif
// Compute the unnormalized filter value at each location of the source // Compute the unnormalized filter value at each location of the source
// it covers. // it covers.
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float filterSum = 0.0f; // Sub of the filter values for normalizing. float filterSum = 0.0f; // Sub of the filter values for normalizing.
int filterCount = srcEnd - srcBegin + 1;
filterValuesArray.reset(filterCount);
for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd; for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd;
curFilterPixel++) { curFilterPixel++) {
// Distance from the center of the filter, this is the filter coordinate #endif
// in source space. We also need to consider the center of the pixel // Sum of the filter values for normalizing.
// when comparing distance against 'srcPixel'. In the 5x downscale // Distance from the center of the filter, this is the filter coordinate
// example used above the distance from the center of the filter to // in source space. We also need to consider the center of the pixel
// the pixel with coordinates (2, 2) should be 0, because its center // when comparing distance against 'srcPixel'. In the 5x downscale
// is at (2.5, 2.5). // example used above the distance from the center of the filter to
// the pixel with coordinates (2, 2) should be 0, because its center
// is at (2.5, 2.5).
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
float srcFilterDist = float srcFilterDist =
((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel); ((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel);
@ -177,36 +196,56 @@ void SkResizeFilter::computeFilters(int srcSize,
// Compute the filter value at that location. // Compute the filter value at that location.
float filterValue = fBitmapFilter->evaluate(destFilterDist); float filterValue = fBitmapFilter->evaluate(destFilterDist);
filterValues.push_back(filterValue); filterValuesArray[curFilterPixel - srcBegin] = filterValue;
filterSum += filterValue; filterSum += filterValue;
} }
SkASSERT(!filterValues.empty()); #else
float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
SkASSERT(filterCount > 0);
filterValuesArray.reset(filterCount);
float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
filterValuesArray.begin());
#endif
// The filter must be normalized so that we don't affect the brightness of // The filter must be normalized so that we don't affect the brightness of
// the image. Convert to normalized fixed point. // the image. Convert to normalized fixed point.
short fixedSum = 0; int fixedSum = 0;
for (int i = 0; i < filterValues.count(); i++) { fixedFilterValuesArray.reset(filterCount);
short curFixed = output->FloatToFixed(filterValues[i] / filterSum); const float* filterValues = filterValuesArray.begin();
SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
float invFilterSum = 1 / filterSum;
#endif
for (int fixedI = 0; fixedI < filterCount; fixedI++) {
#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] / filterSum);
#else
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
#endif
fixedSum += curFixed; fixedSum += curFixed;
fixedFilterValues.push_back(curFixed); fixedFilterValues[fixedI] = SkToS16(curFixed);
} }
SkASSERT(fixedSum <= 0x7FFF);
// The conversion to fixed point will leave some rounding errors, which // The conversion to fixed point will leave some rounding errors, which
// we add back in to avoid affecting the brightness of the image. We // we add back in to avoid affecting the brightness of the image. We
// arbitrarily add this to the center of the filter array (this won't always // arbitrarily add this to the center of the filter array (this won't always
// be the center of the filter function since it could get clipped on the // be the center of the filter function since it could get clipped on the
// edges, but it doesn't matter enough to worry about that case). // edges, but it doesn't matter enough to worry about that case).
short leftovers = output->FloatToFixed(1.0f) - fixedSum; int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
fixedFilterValues[fixedFilterValues.count() / 2] += leftovers; fixedFilterValues[filterCount / 2] += leftovers;
// Now it's ready to go. // Now it's ready to go.
output->AddFilter(srcBegin, &fixedFilterValues[0], #ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
static_cast<int>(fixedFilterValues.count())); output->AddFilter(srcBegin, fixedFilterValues, filterCount);
#else
output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
#endif
} }
if (convolveProcs.fApplySIMDPadding) { if (convolveProcs.fApplySIMDPadding) {
convolveProcs.fApplySIMDPadding( output ); convolveProcs.fApplySIMDPadding(output);
} }
} }

View File

@ -3,9 +3,7 @@
// found in the LICENSE file. // found in the LICENSE file.
#include "SkConvolver.h" #include "SkConvolver.h"
#include "SkMath.h" #include "SkTArray.h"
#include "SkSize.h"
#include "SkTypes.h"
namespace { namespace {
@ -284,21 +282,6 @@ SkConvolutionFilter1D::SkConvolutionFilter1D()
SkConvolutionFilter1D::~SkConvolutionFilter1D() { SkConvolutionFilter1D::~SkConvolutionFilter1D() {
} }
void SkConvolutionFilter1D::AddFilter(int filterOffset,
const float* filterValues,
int filterLength) {
SkASSERT(filterLength > 0);
SkTArray<ConvolutionFixed> fixedValues;
fixedValues.reset(filterLength);
for (int i = 0; i < filterLength; ++i) {
fixedValues.push_back(FloatToFixed(filterValues[i]));
}
AddFilter(filterOffset, &fixedValues[0], filterLength);
}
void SkConvolutionFilter1D::AddFilter(int filterOffset, void SkConvolutionFilter1D::AddFilter(int filterOffset,
const ConvolutionFixed* filterValues, const ConvolutionFixed* filterValues,
int filterLength) { int filterLength) {
@ -323,9 +306,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
filterLength = lastNonZero + 1 - firstNonZero; filterLength = lastNonZero + 1 - firstNonZero;
SkASSERT(filterLength > 0); SkASSERT(filterLength > 0);
for (int i = firstNonZero; i <= lastNonZero; i++) { fFilterValues.append(filterLength, &filterValues[firstNonZero]);
fFilterValues.push_back(filterValues[i]);
}
} else { } else {
// Here all the factors were zeroes. // Here all the factors were zeroes.
filterLength = 0; filterLength = 0;
@ -339,7 +320,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
instance.fOffset = filterOffset; instance.fOffset = filterOffset;
instance.fTrimmedLength = filterLength; instance.fTrimmedLength = filterLength;
instance.fLength = filterSize; instance.fLength = filterSize;
fFilters.push_back(instance); fFilters.push(instance);
fMaxFilter = SkTMax(fMaxFilter, filterLength); fMaxFilter = SkTMax(fMaxFilter, filterLength);
} }

View File

@ -6,8 +6,7 @@
#define SK_CONVOLVER_H #define SK_CONVOLVER_H
#include "SkSize.h" #include "SkSize.h"
#include "SkTypes.h" #include "SkTDArray.h"
#include "SkTArray.h"
// avoid confusion with Mac OS X's math library (Carbon) // avoid confusion with Mac OS X's math library (Carbon)
#if defined(__APPLE__) #if defined(__APPLE__)
@ -58,6 +57,11 @@ public:
// output image. // output image.
int numValues() const { return static_cast<int>(fFilters.count()); } int numValues() const { return static_cast<int>(fFilters.count()); }
void reserveAdditional(int filterCount, int filterValueCount) {
fFilters.setReserve(fFilters.count() + filterCount);
fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
}
// Appends the given list of scaling values for generating a given output // Appends the given list of scaling values for generating a given output
// pixel. |filterOffset| is the distance from the edge of the image to where // pixel. |filterOffset| is the distance from the edge of the image to where
// the scaling factors start. The scaling factors apply to the source pixels // the scaling factors start. The scaling factors apply to the source pixels
@ -68,13 +72,6 @@ public:
// brighness of the image. // brighness of the image.
// //
// The filterLength must be > 0. // The filterLength must be > 0.
//
// This version will automatically convert your input to ConvolutionFixed point.
SK_API void AddFilter(int filterOffset,
const float* filterValues,
int filterLength);
// Same as the above version, but the input is already ConvolutionFixed point.
void AddFilter(int filterOffset, void AddFilter(int filterOffset,
const ConvolutionFixed* filterValues, const ConvolutionFixed* filterValues,
int filterLength); int filterLength);
@ -112,7 +109,7 @@ public:
// SIMD padding which happens outside of this class. // SIMD padding which happens outside of this class.
void addFilterValue( ConvolutionFixed val ) { void addFilterValue( ConvolutionFixed val ) {
fFilterValues.push_back( val ); fFilterValues.push( val );
} }
private: private:
struct FilterInstance { struct FilterInstance {
@ -132,12 +129,12 @@ private:
}; };
// Stores the information for each filter added to this class. // Stores the information for each filter added to this class.
SkTArray<FilterInstance> fFilters; SkTDArray<FilterInstance> fFilters;
// We store all the filter values in this flat list, indexed by // We store all the filter values in this flat list, indexed by
// |FilterInstance.data_location| to avoid the mallocs required for storing // |FilterInstance.data_location| to avoid the mallocs required for storing
// each one separately. // each one separately.
SkTArray<ConvolutionFixed> fFilterValues; SkTDArray<ConvolutionFixed> fFilterValues;
// The maximum size of any filter we've added. // The maximum size of any filter we've added.
int fMaxFilter; int fMaxFilter;