Use SkSTArenaAlloc to manage SkCoverageDeltaMask's stack memory
This way, we could have more stack memory on Google3: if each of the two branche has 12K stack memory, Google3 would believe that it needs 24K stack memory; but using SkSTArenaAlloc, we could use 12K stack memory to handle those two branches. Bug: skia: Change-Id: Ie9234226cd4ba93b5be2ebeb95ab771031354f97 Reviewed-on: https://skia-review.googlesource.com/42101 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Yuqian Li <liyuqian@google.com>
This commit is contained in:
parent
b7da7239f7
commit
bd40a5bf5b
@ -98,12 +98,13 @@ void SkBlitter::blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& c
|
||||
continue;
|
||||
}
|
||||
|
||||
// If there are too many deltas, sorting will be slow. Using a mask will be much faster.
|
||||
// If there are too many deltas, sorting will be slow. Using a mask is much faster.
|
||||
// This is such an important optimization that will bring ~2x speedup for benches like
|
||||
// path_fill_small_long_line and path_stroke_small_sawtooth.
|
||||
if (canUseMask && !deltas->sorted(y) && deltas->count(y) << 3 >= clip.width()) {
|
||||
SkIRect rowIR = SkIRect::MakeLTRB(clip.fLeft, y, clip.fRight, y + 1);
|
||||
SkCoverageDeltaMask mask(rowIR);
|
||||
SkSTArenaAlloc<SkCoverageDeltaMask::MAX_SIZE> alloc;
|
||||
SkCoverageDeltaMask mask(&alloc, rowIR);
|
||||
for(int i = 0; i < deltas->count(y); ++i) {
|
||||
const SkCoverageDelta& delta = deltas->getDelta(y, i);
|
||||
mask.addDelta(delta.fX, y, delta.fDelta);
|
||||
|
@ -54,20 +54,26 @@ bool SkCoverageDeltaMask::Suitable(const SkIRect& bounds) {
|
||||
return bounds.width() <= SUITABLE_WIDTH && CanHandle(bounds);
|
||||
}
|
||||
|
||||
SkCoverageDeltaMask::SkCoverageDeltaMask(const SkIRect& bounds) : fBounds(bounds) {
|
||||
SkCoverageDeltaMask::SkCoverageDeltaMask(SkArenaAlloc* alloc, const SkIRect& bounds) {
|
||||
SkASSERT(CanHandle(bounds));
|
||||
|
||||
fBounds = bounds;
|
||||
|
||||
// Init the anti-rect to be empty
|
||||
fAntiRect.fY = fBounds.fBottom;
|
||||
fAntiRect.fHeight = 0;
|
||||
|
||||
fExpandedWidth = ExpandWidth(fBounds.width());
|
||||
|
||||
int size = fExpandedWidth * bounds.height() + PADDING * 2;
|
||||
fDeltaStorage = alloc->makeArrayDefault<SkFixed>(size);
|
||||
fMask = alloc->makeArrayDefault<SkAlpha>(size);
|
||||
|
||||
// Add PADDING columns so we may access fDeltas[index(-PADDING, 0)]
|
||||
// Minus index(fBounds.fLeft, fBounds.fTop) so we can directly access fDeltas[index(x, y)]
|
||||
fDeltas = fDeltaStorage + PADDING - this->index(fBounds.fLeft, fBounds.fTop);
|
||||
|
||||
memset(fDeltaStorage, 0, (fExpandedWidth * bounds.height() + PADDING * 2) * sizeof(SkFixed));
|
||||
memset(fDeltaStorage, 0, size * sizeof(SkFixed));
|
||||
}
|
||||
|
||||
// TODO As this function is so performance-critical (and we're thinking so much about SIMD), use
|
||||
|
@ -117,13 +117,14 @@ public:
|
||||
#else
|
||||
static constexpr int MAX_MASK_SIZE = 2048;
|
||||
#endif
|
||||
static constexpr int MAX_SIZE = MAX_MASK_SIZE * (sizeof(SkFixed) + sizeof(SkAlpha));
|
||||
|
||||
// Expand PADDING on both sides, and make it a multiple of SIMD_WIDTH
|
||||
static int ExpandWidth(int width);
|
||||
static bool CanHandle(const SkIRect& bounds); // whether bounds fits into MAX_MASK_SIZE
|
||||
static bool Suitable(const SkIRect& bounds); // CanHandle(bounds) && width <= SUITABLE_WIDTH
|
||||
|
||||
SkCoverageDeltaMask(const SkIRect& bounds);
|
||||
SkCoverageDeltaMask(SkArenaAlloc* alloc, const SkIRect& bounds);
|
||||
|
||||
int top() const { return fBounds.fTop; }
|
||||
int bottom() const { return fBounds.fBottom; }
|
||||
@ -155,9 +156,9 @@ public:
|
||||
|
||||
private:
|
||||
SkIRect fBounds;
|
||||
SkFixed fDeltaStorage[MAX_MASK_SIZE];
|
||||
SkFixed* fDeltaStorage;
|
||||
SkFixed* fDeltas;
|
||||
SkAlpha fMask[MAX_MASK_SIZE];
|
||||
SkAlpha* fMask;
|
||||
int fExpandedWidth;
|
||||
SkAntiRect fAntiRect;
|
||||
|
||||
|
@ -338,20 +338,21 @@ void SkScan::DAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef GOOGLE3
|
||||
constexpr int STACK_SIZE = 12 << 10; // 12K stack size alloc; Google3 has 16K limit.
|
||||
#else
|
||||
constexpr int STACK_SIZE = 64 << 10; // 64k stack size to avoid heap allocation
|
||||
#endif
|
||||
SkSTArenaAlloc<STACK_SIZE> alloc; // avoid heap allocation with SkSTArenaAlloc
|
||||
|
||||
// Only blitter->blitXXX need to be done in order in the threaded backend.
|
||||
// Everything before can be done out of order in the threaded backend.
|
||||
if (!forceRLE && !isInverse && SkCoverageDeltaMask::Suitable(clippedIR)) {
|
||||
SkCoverageDeltaMask deltaMask(clippedIR);
|
||||
SkCoverageDeltaMask deltaMask(&alloc, clippedIR);
|
||||
gen_alpha_deltas(path, *clipRgn, deltaMask, blitter, skipRect, clipRect == nullptr);
|
||||
deltaMask.convertCoverageToAlpha(isEvenOdd, isInverse, isConvex);
|
||||
blitter->blitMask(deltaMask.prepareSkMask(), clippedIR);
|
||||
} else {
|
||||
#ifdef GOOGLE3
|
||||
constexpr int STACK_SIZE = 8 << 10; // 8K stack size alloc; Google3 has 16K limit.
|
||||
#else
|
||||
constexpr int STACK_SIZE = 64 << 10; // 64k stack size to avoid heap allocation
|
||||
#endif
|
||||
SkSTArenaAlloc<STACK_SIZE> alloc; // avoid heap allocation with SkSTArenaAlloc
|
||||
SkCoverageDeltaList deltaList(&alloc, clippedIR.fTop, clippedIR.fBottom, forceRLE);
|
||||
gen_alpha_deltas(path, *clipRgn, deltaList, blitter, skipRect, clipRect == nullptr);
|
||||
blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex);
|
||||
|
Loading…
Reference in New Issue
Block a user