Refactor resize filter to go faster

Part of this CL improves the speed by using dynamic arrays more effectively. Part uses SIMD and more concise float expressions for speed. Some unused code was deleted. The latter changes are guarded by: SK_SUPPORT_LEGACY_BITMAP_FILTER until this lands and the corresponding layout changes in chrome can be relanded. With the legacy flag defined, no Skia or Chrome test results change. Without the flag defined in Skia, only 0.01% - 0.02% of the pixels change, and then by (1,1,1) in 8888. codereview.chromium.org/1583533002 adds the guard to Chrome. R=reed@google.com BUG=skia:2261 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1563183003 Review URL: https://codereview.chromium.org/1563183003
2016-01-12 10:44:02 -08:00 · 2016-01-12 10:44:02 -08:00 · 368342ccb5
commit 368342ccb5
parent 05dcb4c7a9
5 changed files with 181 additions and 75 deletions
--- a/src/core/SkBitmapFilter.cpp
+++ b/src/core/SkBitmapFilter.cpp
@ -20,7 +20,7 @@ SK_CONF_DECLARE(const char *, c_bitmapFilter, "bitmap.filter", "mitchell", "Whic

 SkBitmapFilter *SkBitmapFilter::Allocate() {
    if (!strcmp(c_bitmapFilter, "mitchell")) {
-        return new SkMitchellFilter(1.f / 3.f, 1.f / 3.f);
+        return new SkMitchellFilter;
    } else if (!strcmp(c_bitmapFilter, "lanczos")) {
        return new SkLanczosFilter;
    } else if (!strcmp(c_bitmapFilter, "hamming")) {
--- a/src/core/SkBitmapFilter.h
+++ b/src/core/SkBitmapFilter.h
@ -12,6 +12,10 @@
 #include "SkMath.h"
 #include "SkScalar.h"

+#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
+#include "SkNx.h"
+#endif
+
 // size of the precomputed bitmap filter tables for high quality filtering.
 // Used to precompute the shape of the filter kernel.
 // Table size chosen from experiments to see where I could start to see a difference.
@ -47,6 +51,20 @@ class SkBitmapFilter {
      float width() const { return fWidth; }
      float invWidth() const { return fInvWidth; }
      virtual float evaluate(float x) const = 0;
+
+#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
+      virtual float evaluate_n(float val, float diff, int count, float* output) const {
+          float sum = 0;
+          for (int index = 0; index < count; index++) {
+              float filterValue = evaluate(val);
+              *output++ = filterValue;
+              sum += filterValue;
+              val += diff;
+          }
+          return sum;
+      }
+#endif
+
      virtual ~SkBitmapFilter() {}

      static SkBitmapFilter* Allocate();
@ -73,27 +91,98 @@ class SkBitmapFilter {
      }
 };

-class SkMitchellFilter: public SkBitmapFilter {
+class SkMitchellFilter : public SkBitmapFilter {
  public:
-      SkMitchellFilter(float b, float c, float width=2.0f)
-      : SkBitmapFilter(width), B(b), C(c) {
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
+      SkMitchellFilter()
+      : INHERITED(2), B(1.f / 3), C(1.f / 3) {
      }
+#else
+      SkMitchellFilter()
+          : INHERITED(2)
+          , fB(1.f / 3.f)
+          , fC(1.f / 3.f)
+          , fA1(-fB - 6*fC)
+          , fB1(6*fB + 30*fC)
+          , fC1(-12*fB - 48*fC)
+          , fD1(8*fB + 24*fC)
+          , fA2(12 - 9*fB - 6*fC)
+          , fB2(-18 + 12*fB + 6*fC)
+          , fD2(6 - 2*fB) {
+      }
+#endif

      float evaluate(float x) const override {
          x = fabsf(x);
          if (x > 2.f) {
              return 0;
          } else if (x > 1.f) {
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
              return ((-B - 6*C) * x*x*x + (6*B + 30*C) * x*x +
                      (-12*B - 48*C) * x + (8*B + 24*C)) * (1.f/6.f);
+#else
+              return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
+#endif
          } else {
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
              return ((12 - 9*B - 6*C) * x*x*x +
                      (-18 + 12*B + 6*C) * x*x +
                      (6 - 2*B)) * (1.f/6.f);
+#else
+              return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
+#endif
          }
      }
+
+#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
+      // TODO : native Sk4f abs
+      static Sk4f abs(const Sk4f& x) {
+          Sk4f neg = x < Sk4f(0);
+          return neg.thenElse(Sk4f(0) - x, x);
+      }
+
+      Sk4f evalcore_n(const Sk4f& val) const {
+          Sk4f x = abs(val);
+          Sk4f over2 = x > Sk4f(2);
+          Sk4f over1 = x > Sk4f(1);
+          Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
+                * Sk4f(1.f/6.f);
+          Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
+          return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
+      }
+
+      float evaluate_n(float val, float diff, int count, float* output) const override {
+          Sk4f sum(0);
+          while (count >= 4) {
+              float v0 = val;
+              float v1 = val += diff;
+              float v2 = val += diff;
+              float v3 = val += diff;
+              val += diff;
+              Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
+              filterValue.store(output);
+              output += 4;
+              sum = sum + filterValue;
+              count -= 4;
+          }
+          float sums[4];
+          sum.store(sums);
+          float result = sums[0] + sums[1] + sums[2] + sums[3];
+          result += INHERITED::evaluate_n(val, diff, count, output);
+          return result;
+      }
+#endif
+
  protected:
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
      float B, C;
+#else
+      float fB, fC;
+      float fA1, fB1, fC1, fD1;
+      float fA2, fB2, fD2;
+#endif
+  private:
+      typedef SkBitmapFilter INHERITED;
 };

 class SkGaussianFilter: public SkBitmapFilter {
--- a/src/core/SkBitmapScaler.cpp
+++ b/src/core/SkBitmapScaler.cpp
@ -11,7 +11,6 @@
 #include "SkImageInfo.h"
 #include "SkPixmap.h"
 #include "SkRect.h"
-#include "SkScalar.h"
 #include "SkTArray.h"

 // SkResizeFilter ----------------------------------------------------------------
@ -74,7 +73,7 @@ SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
            fBitmapFilter = new SkTriangleFilter;
            break;
        case SkBitmapScaler::RESIZE_MITCHELL:
-            fBitmapFilter = new SkMitchellFilter(1.f / 3.f, 1.f / 3.f);
+            fBitmapFilter = new SkMitchellFilter;
            break;
        case SkBitmapScaler::RESIZE_HAMMING:
            fBitmapFilter = new SkHammingFilter;
@ -130,45 +129,65 @@ void SkResizeFilter::computeFilters(int srcSize,
  // to support the filtering function.
  float srcSupport = fBitmapFilter->width() / clampedScale;

-  // Speed up the divisions below by turning them into multiplies.
  float invScale = 1.0f / scale;

-  SkTArray<float> filterValues(64);
-  SkTArray<short> fixedFilterValues(64);
+  SkSTArray<64, float, true> filterValuesArray;
+  SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;

  // Loop over all pixels in the output range. We will generate one set of
  // filter values for each one. Those values will tell us how to blend the
  // source pixels to compute the destination pixel.
+
+  // This is the pixel in the source directly under the pixel in the dest.
+  // Note that we base computations on the "center" of the pixels. To see
+  // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
+  // downscale should "cover" the pixels around the pixel with *its center*
+  // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
+  // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
+  int destLimit = SkScalarTruncToInt(SkScalarCeilToScalar(destSubsetHi)
+        - SkScalarFloorToScalar(destSubsetLo));
+#else
+  destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
+  destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
+  float srcPixel = (destSubsetLo + 0.5f) * invScale;
+  int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
+#endif
+  output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
  for (int destSubsetI = SkScalarFloorToInt(destSubsetLo); destSubsetI < SkScalarCeilToInt(destSubsetHi);
-       destSubsetI++) {
-    // Reset the arrays. We don't declare them inside so they can re-use the
-    // same malloc-ed buffer.
-    filterValues.reset();
-    fixedFilterValues.reset();
-
-    // This is the pixel in the source directly under the pixel in the dest.
-    // Note that we base computations on the "center" of the pixels. To see
-    // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
-    // downscale should "cover" the pixels around the pixel with *its center*
-    // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
-    // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
-    float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
-
+       destSubsetI++)
+#else
+  for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++)
+#endif
+  {
    // Compute the (inclusive) range of source pixels the filter covers.
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
+    float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
    int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport));
    int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport));
+#else
+    float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
+    float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
+#endif

    // Compute the unnormalized filter value at each location of the source
    // it covers.
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
    float filterSum = 0.0f;  // Sub of the filter values for normalizing.
+    int filterCount = srcEnd - srcBegin + 1;
+    filterValuesArray.reset(filterCount);
    for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd;
         curFilterPixel++) {
-      // Distance from the center of the filter, this is the filter coordinate
-      // in source space. We also need to consider the center of the pixel
-      // when comparing distance against 'srcPixel'. In the 5x downscale
-      // example used above the distance from the center of the filter to
-      // the pixel with coordinates (2, 2) should be 0, because its center
-      // is at (2.5, 2.5).
+#endif
+    // Sum of the filter values for normalizing.
+    // Distance from the center of the filter, this is the filter coordinate
+    // in source space. We also need to consider the center of the pixel
+    // when comparing distance against 'srcPixel'. In the 5x downscale
+    // example used above the distance from the center of the filter to
+    // the pixel with coordinates (2, 2) should be 0, because its center
+    // is at (2.5, 2.5).
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
      float srcFilterDist =
          ((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel);

@ -177,36 +196,56 @@ void SkResizeFilter::computeFilters(int srcSize,

      // Compute the filter value at that location.
      float filterValue = fBitmapFilter->evaluate(destFilterDist);
-      filterValues.push_back(filterValue);
+      filterValuesArray[curFilterPixel - srcBegin] = filterValue;

      filterSum += filterValue;
    }
-    SkASSERT(!filterValues.empty());
-
+#else
+    float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
+    int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
+    SkASSERT(filterCount > 0);
+    filterValuesArray.reset(filterCount);
+    float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
+                                                filterValuesArray.begin());
+#endif
    // The filter must be normalized so that we don't affect the brightness of
    // the image. Convert to normalized fixed point.
-    short fixedSum = 0;
-    for (int i = 0; i < filterValues.count(); i++) {
-      short curFixed = output->FloatToFixed(filterValues[i] / filterSum);
+    int fixedSum = 0;
+    fixedFilterValuesArray.reset(filterCount);
+    const float* filterValues = filterValuesArray.begin();
+    SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
+#ifndef SK_SUPPORT_LEGACY_BITMAP_FILTER
+    float invFilterSum = 1 / filterSum;
+#endif
+    for (int fixedI = 0; fixedI < filterCount; fixedI++) {
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
+      int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] / filterSum);
+#else
+      int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
+#endif
      fixedSum += curFixed;
-      fixedFilterValues.push_back(curFixed);
+      fixedFilterValues[fixedI] = SkToS16(curFixed);
    }
+    SkASSERT(fixedSum <= 0x7FFF);

    // The conversion to fixed point will leave some rounding errors, which
    // we add back in to avoid affecting the brightness of the image. We
    // arbitrarily add this to the center of the filter array (this won't always
    // be the center of the filter function since it could get clipped on the
    // edges, but it doesn't matter enough to worry about that case).
-    short leftovers = output->FloatToFixed(1.0f) - fixedSum;
-    fixedFilterValues[fixedFilterValues.count() / 2] += leftovers;
+    int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
+    fixedFilterValues[filterCount / 2] += leftovers;

    // Now it's ready to go.
-    output->AddFilter(srcBegin, &fixedFilterValues[0],
-                      static_cast<int>(fixedFilterValues.count()));
+#ifdef SK_SUPPORT_LEGACY_BITMAP_FILTER
+    output->AddFilter(srcBegin, fixedFilterValues, filterCount);
+#else
+    output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
+#endif
  }

  if (convolveProcs.fApplySIMDPadding) {
-      convolveProcs.fApplySIMDPadding( output );
+      convolveProcs.fApplySIMDPadding(output);
  }
 }

--- a/src/core/SkConvolver.cpp
+++ b/src/core/SkConvolver.cpp
@ -3,9 +3,7 @@
 // found in the LICENSE file.

 #include "SkConvolver.h"
-#include "SkMath.h"
-#include "SkSize.h"
-#include "SkTypes.h"
+#include "SkTArray.h"

 namespace {

@ -284,21 +282,6 @@ SkConvolutionFilter1D::SkConvolutionFilter1D()
 SkConvolutionFilter1D::~SkConvolutionFilter1D() {
 }

-void SkConvolutionFilter1D::AddFilter(int filterOffset,
-                                      const float* filterValues,
-                                      int filterLength) {
-    SkASSERT(filterLength > 0);
-
-    SkTArray<ConvolutionFixed> fixedValues;
-    fixedValues.reset(filterLength);
-
-    for (int i = 0; i < filterLength; ++i) {
-        fixedValues.push_back(FloatToFixed(filterValues[i]));
-    }
-
-    AddFilter(filterOffset, &fixedValues[0], filterLength);
-}
-
 void SkConvolutionFilter1D::AddFilter(int filterOffset,
                                      const ConvolutionFixed* filterValues,
                                      int filterLength) {
@ -323,9 +306,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
        filterLength = lastNonZero + 1 - firstNonZero;
        SkASSERT(filterLength > 0);

-        for (int i = firstNonZero; i <= lastNonZero; i++) {
-            fFilterValues.push_back(filterValues[i]);
-        }
+        fFilterValues.append(filterLength, &filterValues[firstNonZero]);
    } else {
        // Here all the factors were zeroes.
        filterLength = 0;
@ -339,7 +320,7 @@ void SkConvolutionFilter1D::AddFilter(int filterOffset,
    instance.fOffset = filterOffset;
    instance.fTrimmedLength = filterLength;
    instance.fLength = filterSize;
-    fFilters.push_back(instance);
+    fFilters.push(instance);

    fMaxFilter = SkTMax(fMaxFilter, filterLength);
 }
--- a/src/core/SkConvolver.h
+++ b/src/core/SkConvolver.h
@ -6,8 +6,7 @@
 #define SK_CONVOLVER_H

 #include "SkSize.h"
-#include "SkTypes.h"
-#include "SkTArray.h"
+#include "SkTDArray.h"

 // avoid confusion with Mac OS X's math library (Carbon)
 #if defined(__APPLE__)
@ -58,6 +57,11 @@ public:
    // output image.
    int numValues() const { return static_cast<int>(fFilters.count()); }

+    void reserveAdditional(int filterCount, int filterValueCount) {
+        fFilters.setReserve(fFilters.count() + filterCount);
+        fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
+    }
+
    // Appends the given list of scaling values for generating a given output
    // pixel. |filterOffset| is the distance from the edge of the image to where
    // the scaling factors start. The scaling factors apply to the source pixels
@ -68,13 +72,6 @@ public:
    // brighness of the image.
    //
    // The filterLength must be > 0.
-    //
-    // This version will automatically convert your input to ConvolutionFixed point.
-    SK_API void AddFilter(int filterOffset,
-                          const float* filterValues,
-                          int filterLength);
-
-    // Same as the above version, but the input is already ConvolutionFixed point.
    void AddFilter(int filterOffset,
                   const ConvolutionFixed* filterValues,
                   int filterLength);
@ -112,7 +109,7 @@ public:
    // SIMD padding which happens outside of this class.

    void addFilterValue( ConvolutionFixed val ) {
-        fFilterValues.push_back( val );
+        fFilterValues.push( val );
    }
 private:
    struct FilterInstance {
@ -132,12 +129,12 @@ private:
    };

    // Stores the information for each filter added to this class.
-    SkTArray<FilterInstance> fFilters;
+    SkTDArray<FilterInstance> fFilters;

    // We store all the filter values in this flat list, indexed by
    // |FilterInstance.data_location| to avoid the mallocs required for storing
    // each one separately.
-    SkTArray<ConvolutionFixed> fFilterValues;
+    SkTDArray<ConvolutionFixed> fFilterValues;

    // The maximum size of any filter we've added.
    int fMaxFilter;