Stay in floats as much as possible in SkTileGrid, particularly in insert.

SkTileGrid::insert() is about 15% of recording time before this CL, which reduces it to ~10%. Next steps are looking into some of the TODOs I've left myself, and vectorizing the math. Most of the win here comes from converting integer divisions into float multiplies. BUG=skia:1021 Review URL: https://codereview.chromium.org/615853007
2014-10-07 11:46:39 -07:00 · 2014-10-07 11:46:39 -07:00 · 65be97d1a1
commit 65be97d1a1
parent d99741cbbe
2 changed files with 64 additions and 60 deletions
--- a/src/core/SkTileGrid.cpp
+++ b/src/core/SkTileGrid.cpp
@ -10,92 +10,90 @@
 SkTileGrid::SkTileGrid(int xTiles, int yTiles, const SkTileGridFactory::TileGridInfo& info)
    : fXTiles(xTiles)
    , fYTiles(yTiles)
-    , fInfo(info)
-    , fTiles(SkNEW_ARRAY(SkTDArray<unsigned>, xTiles * yTiles)) {
-    // Margin is offset by 1 as a provision for AA and
-    // to cancel-out the outset applied by getClipDeviceBounds.
-    fInfo.fMargin.fHeight++;
-    fInfo.fMargin.fWidth++;
-}
+    , fInvWidth( SkScalarInvert(info.fTileInterval.width()))
+    , fInvHeight(SkScalarInvert(info.fTileInterval.height()))
+    , fMarginWidth (info.fMargin.fWidth +1)  // Margin is offset by 1 as a provision for AA and
+    , fMarginHeight(info.fMargin.fHeight+1)  // to cancel the outset applied by getClipDeviceBounds.
+    , fOffset(SkPoint::Make(info.fOffset.fX, info.fOffset.fY))
+    , fGridBounds(SkRect::MakeWH(xTiles * info.fTileInterval.width(),
+                                 yTiles * info.fTileInterval.height()))
+    , fTiles(SkNEW_ARRAY(SkTDArray<unsigned>, xTiles * yTiles)) {}

 SkTileGrid::~SkTileGrid() {
    SkDELETE_ARRAY(fTiles);
 }

-void SkTileGrid::insert(unsigned opIndex, const SkRect& fbounds, bool) {
-    SkASSERT(!fbounds.isEmpty());
+// Adjustments to user-provided bounds common to both insert() and search().
+// Call this after making insert- or search- specific adjustments.
+void SkTileGrid::commonAdjust(SkRect* rect) const {
+    // Apply our offset.
+    rect->offset(fOffset);

-    SkIRect dilatedBounds;
-    fbounds.roundOut(&dilatedBounds);
-    dilatedBounds.outset(fInfo.fMargin.width(), fInfo.fMargin.height());
-    dilatedBounds.offset(fInfo.fOffset);
+    // Scrunch the bounds in just a little to make the right and bottom edges
+    // exclusive.  We want bounds of exactly one tile to hit exactly one tile.
+    rect->fRight  -= SK_ScalarNearlyZero;
+    rect->fBottom -= SK_ScalarNearlyZero;
+}

-    const SkIRect gridBounds =
-        { 0, 0, fInfo.fTileInterval.width() * fXTiles, fInfo.fTileInterval.height() * fYTiles };
-    if (!SkIRect::Intersects(dilatedBounds, gridBounds)) {
+// Convert user-space bounds to grid tiles they cover (LT inclusive, RB exclusive).
+void SkTileGrid::userToGrid(const SkRect& user, SkIRect* grid) const {
+    grid->fLeft   = SkPin32(user.left()   * fInvWidth , 0, fXTiles - 1);
+    grid->fTop    = SkPin32(user.top()    * fInvHeight, 0, fYTiles - 1);
+    grid->fRight  = SkPin32(user.right()  * fInvWidth , 0, fXTiles - 1) + 1;
+    grid->fBottom = SkPin32(user.bottom() * fInvHeight, 0, fYTiles - 1) + 1;
+}
+
+void SkTileGrid::insert(unsigned opIndex, const SkRect& originalBounds, bool) {
+    SkRect bounds = originalBounds;
+    bounds.outset(fMarginWidth, fMarginHeight);
+    this->commonAdjust(&bounds);
+
+    // TODO(mtklein): can we assert this instead to save an intersection in Release mode,
+    // or just allow out-of-bound insertions to insert anyway (clamped to nearest tile)?
+    if (!SkRect::Intersects(bounds, fGridBounds)) {
        return;
    }

-    // Note: SkIRects are non-inclusive of the right() column and bottom() row,
-    // hence the "-1"s in the computations of maxX and maxY.
-    int minX = SkMax32(0, SkMin32(dilatedBounds.left() / fInfo.fTileInterval.width(), fXTiles - 1));
-    int minY = SkMax32(0, SkMin32(dilatedBounds.top() / fInfo.fTileInterval.height(), fYTiles - 1));
-    int maxX = SkMax32(0, SkMin32((dilatedBounds.right()  - 1) / fInfo.fTileInterval.width(),
-                                  fXTiles - 1));
-    int maxY = SkMax32(0, SkMin32((dilatedBounds.bottom() - 1) / fInfo.fTileInterval.height(),
-                                  fYTiles - 1));
+    SkIRect grid;
+    this->userToGrid(bounds, &grid);

-    for (int y = minY; y <= maxY; y++) {
-        for (int x = minX; x <= maxX; x++) {
+    for (int y = grid.fTop; y < grid.fBottom; y++) {
+        for (int x = grid.fLeft; x < grid.fRight; x++) {
            fTiles[y * fXTiles + x].push(opIndex);
        }
    }
 }

-static int divide_ceil(int x, int y) {
-    return (x + y - 1) / y;
-}
-
 // Number of tiles for which data is allocated on the stack in
 // SkTileGrid::search. If malloc becomes a bottleneck, we may consider
 // increasing this number. Typical large web page, say 2k x 16k, would
 // require 512 tiles of size 256 x 256 pixels.
 static const int kStackAllocationTileCount = 1024;

-void SkTileGrid::search(const SkRect& query, SkTDArray<unsigned>* results) const {
-    SkIRect adjusted;
-    query.roundOut(&adjusted);
+void SkTileGrid::search(const SkRect& originalQuery, SkTDArray<unsigned>* results) const {
+    // The inset counteracts the outset that applied in 'insert', which optimizes
+    // for lookups of size 'tileInterval + 2 * margin' (aligned with the tile grid).
+    SkRect query = originalQuery;
+    query.inset(fMarginWidth, fMarginHeight);
+    this->commonAdjust(&query);

-    // The inset is to counteract the outset that was applied in 'insert'
-    // The outset/inset is to optimize for lookups of size
-    // 'tileInterval + 2 * margin' that are aligned with the tile grid.
-    adjusted.inset(fInfo.fMargin.width(), fInfo.fMargin.height());
-    adjusted.offset(fInfo.fOffset);
-    adjusted.sort();  // in case the inset inverted the rectangle
+    // The inset may have inverted the rectangle, so sort().
+    // TODO(mtklein): It looks like we only end up with inverted bounds in unit tests
+    // that make explicitly inverted queries, not from insetting.  If we can drop support for
+    // unsorted bounds (i.e. we don't see them outside unit tests), I think we can drop this.
+    query.sort();

-    // Convert the query rectangle from device coordinates to tile coordinates
-    // by rounding outwards to the nearest tile boundary so that the resulting tile
-    // region includes the query rectangle.
-    int startX = adjusted.left() / fInfo.fTileInterval.width(),
-        startY = adjusted.top()  / fInfo.fTileInterval.height();
-    int endX = divide_ceil(adjusted.right(),  fInfo.fTileInterval.width()),
-        endY = divide_ceil(adjusted.bottom(), fInfo.fTileInterval.height());
+    // No intersection check.  We optimize for queries that are in bounds.
+    // We're safe anyway: userToGrid() will clamp out-of-bounds queries to nearest tile.
+    SkIRect grid;
+    this->userToGrid(query, &grid);

-    // Logically, we could pin endX to [startX, fXTiles], but we force it
-    // up to (startX, fXTiles] to make sure we hit at least one tile.
-    // This snaps just-out-of-bounds queries to the neighboring border tile.
-    // I don't know if this is an important feature outside of unit tests.
-    startX = SkPin32(startX, 0, fXTiles - 1);
-    startY = SkPin32(startY, 0, fYTiles - 1);
-    endX   = SkPin32(endX, startX + 1, fXTiles);
-    endY   = SkPin32(endY, startY + 1, fYTiles);
-
-    const int tilesHit = (endX - startX) * (endY - startY);
+    const int tilesHit = (grid.fRight - grid.fLeft) * (grid.fBottom - grid.fTop);
    SkASSERT(tilesHit > 0);

    if (tilesHit == 1) {
        // A performance shortcut.  The merging code below would work fine here too.
-        *results = fTiles[startY * fXTiles + startX];
+        *results = fTiles[grid.fTop * fXTiles + grid.fLeft];
        return;
    }

@ -105,8 +103,8 @@ void SkTileGrid::search(const SkRect& query, SkTDArray<unsigned>* results) const
    // Gather pointers to the starts and ends of the tiles to merge.
    SkAutoSTArray<kStackAllocationTileCount, const unsigned*> starts(tilesHit), ends(tilesHit);
    int i = 0;
-    for (int x = startX; x < endX; x++) {
-        for (int y = startY; y < endY; y++) {
+    for (int y = grid.fTop; y < grid.fBottom; y++) {
+        for (int x = grid.fLeft; x < grid.fRight; x++) {
            starts[i] = fTiles[y * fXTiles + x].begin();
            ends[i]   = fTiles[y * fXTiles + x].end();
            i++;
--- a/src/core/SkTileGrid.h
+++ b/src/core/SkTileGrid.h
@ -42,8 +42,14 @@ public:
    int tileCount(int x, int y) { return fTiles[y * fXTiles + x].count(); }

 private:
+    void commonAdjust(SkRect*) const;
+    void userToGrid(const SkRect&, SkIRect* grid) const;
+
    const int fXTiles, fYTiles;
-    SkTileGridFactory::TileGridInfo fInfo;
+    const SkScalar fInvWidth, fInvHeight;
+    const SkScalar fMarginWidth, fMarginHeight;
+    const SkPoint fOffset;
+    const SkRect  fGridBounds;

    // (fXTiles * fYTiles) SkTDArrays, each listing ops overlapping that tile in order.
    SkTDArray<unsigned>* fTiles;