[graphite] Add HybridBoundsManager and use as default impl

This combines the BruteForceManager and Gridmanager, starting with
brute force when it's fastest for low N and then transitioning to less
accurate grid that scales for larger N.

Also updates the set of benchmarks to run based on more reasonable
configs (e.g. having a reasonable level of accuracy to be worth
considering).

Updates Device to use the HybridBoundsManager with brute force up to
64 draws, and then a grid configured to make 16x16 pixel cells. My
guess is we will see a mix of perf regressions and improvements with
this. The existing use of the NaiveBoundsManager had negligible CPU
cost but disallowed all re-ordering. The brute force and grid
managers will add CPU cost but enable re-ordering, which shows up
as shorter command buffers (e.g. 17k commands vs. 28k commands in the
motionmark suits benchmark). However, because we don't have SSBOs
there still isn't as much batching that would let the GPU take
advantage of this re-ordering so I'm not sure how visible the wins
will be yet.

Bug: skia:13201, skia:12787
Change-Id: Iad58fccab45def5f702a30860e063669424dfcf2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/550518
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Reviewed-by: Nicolette Prevost <nicolettep@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
Michael Ludwig 2022-06-16 11:53:42 -04:00 committed by SkCQ
parent 66b0b198ff
commit e9eda5802a
3 changed files with 110 additions and 8 deletions

View File

@ -88,7 +88,7 @@ public:
int numRandomRects)
: BoundsManagerBench(std::move(manager))
, fNumRandomRects(numRandomRects) {
fName.printf("BoundsManager_rand%i_%s", numRandomRects, managerName);
fName.printf("BoundsManager_rand_%i_%s", numRandomRects, managerName);
}
private:
@ -171,10 +171,10 @@ private:
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::NaiveBoundsManager>(), "naive")
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::BruteForceBoundsManager>(), "brute")
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 16), "grid16")
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 64), "grid64")
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 128), "grid128")
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 512), "grid512")
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::HybridBoundsManager>(SkISize{1800, 1800}, 16, 64), "hybrid16x16n128")
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::HybridBoundsManager>(SkISize{1800, 1800}, 16, 128), "hybrid16x16n256")
// Uncomment and adjust device size to match reported bounds from --boundsManagerFile
// DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::MakeRes({w, h}, 8), "gridRes8")

View File

@ -186,12 +186,25 @@ sk_sp<Device> Device::Make(Recorder* recorder,
return sk_sp<Device>(new Device(recorder, std::move(dc)));
}
// These default tuning numbers for the HybridBoundsManager were chosen from looking at performance
// and accuracy curves produced by the BoundsManagerBench for random draw bounding boxes. This
// config will use brute force for the first 64 draw calls to the Device and then switch to a grid
// that is dynamically sized to produce cells that are 16x16, which seemed to be in the sweet spot
// for maintaining good performance without becoming too inaccurate.
// TODO: These could be exposed as context options or surface options, and we may want to have
// different strategies in place for a base device vs. a layer's device.
static constexpr int kGridCellSize = 16;
static constexpr int kMaxBruteForceN = 64;
Device::Device(Recorder* recorder, sk_sp<DrawContext> dc)
: SkBaseDevice(dc->imageInfo(), SkSurfaceProps())
, fRecorder(recorder)
, fDC(std::move(dc))
, fClip(this)
, fColorDepthBoundsManager(std::make_unique<NaiveBoundsManager>())
, fColorDepthBoundsManager(
std::make_unique<HybridBoundsManager>(fDC->imageInfo().dimensions(),
kGridCellSize,
kMaxBruteForceN))
, fDisjointStencilSet(std::make_unique<IntersectionTreeSet>())
, fCachedLocalToDevice(SkM44())
, fCurrentDepth(DrawOrder::kClearDepth)

View File

@ -81,10 +81,8 @@ public:
CompressedPaintersOrder max = CompressedPaintersOrder::First();
auto orderIter = fOrders.items().begin();
for (const Rect& r : fRects.items()) {
if (r.intersects(boundsComplement)) {
if (max < *orderIter) {
max = *orderIter;
}
if (r.intersects(boundsComplement) && max < *orderIter) {
max = *orderIter;
}
++orderIter;
}
@ -101,6 +99,16 @@ public:
fOrders.reset();
}
int count() const { return fRects.count(); }
void replayDraws(BoundsManager* manager) const {
auto orderIter = fOrders.items().begin();
for (const Rect& r : fRects.items()) {
manager->recordDraw(r, *orderIter);
++orderIter;
}
}
private:
// fRects and fOrders are parallel, but kept separate to avoid wasting padding since Rect is
// an over-aligned type.
@ -108,6 +116,7 @@ private:
SkTBlockList<CompressedPaintersOrder> fOrders{16, SkBlockAllocator::GrowthPolicy::kFibonacci};
};
// A BoundsManager that tracks highest CompressedPaintersOrder over a uniform spatial grid.
class GridBoundsManager : public BoundsManager {
public:
// 'gridSize' is the number of cells in the X and Y directions, splitting the pixels from [0,0]
@ -215,6 +224,86 @@ private:
SkAutoTMalloc<CompressedPaintersOrder> fNodes;
};
// A BoundsManager that first relies on BruteForceBoundsManager for N draw calls, and then switches
// to the GridBoundsManager if it exceeds its limit. For low N, the brute force approach is
// surprisingly efficient, has the highest accuracy, and very low memory overhead. Once the draw
// call count is large enough, the grid's lower performance complexity outweigh its memory cost and
// reduced accuracy.
class HybridBoundsManager : public BoundsManager {
public:
HybridBoundsManager(const SkISize& deviceSize,
int gridCellSize,
int maxBruteForceN)
: fDeviceSize(deviceSize)
, fGridCellSize(gridCellSize)
, fMaxBruteForceN(maxBruteForceN)
, fCurrentManager(&fBruteForceManager) {
SkASSERT(deviceSize.width() >= 1 && deviceSize.height() >= 1 &&
gridCellSize >= 1 && maxBruteForceN >= 1);
}
CompressedPaintersOrder getMostRecentDraw(const Rect& bounds) const override {
return fCurrentManager->getMostRecentDraw(bounds);
}
void recordDraw(const Rect& bounds, CompressedPaintersOrder order) override {
this->updateCurrentManagerIfNeeded();
fCurrentManager->recordDraw(bounds, order);
}
void reset() override {
const bool usedGrid = fCurrentManager == fGridManager.get();
if (usedGrid) {
// Reset the grid manager so it's ready to use next frame, but don't delete it.
fGridManager->reset();
// Assume brute force manager was reset when we swapped to the grid originally
fCurrentManager = &fBruteForceManager;
} else {
if (fGridManager) {
// Clean up the grid manager that was created over a frame ago without being used.
// This could lead to re-allocating the grid every-other frame, but it's a simple
// way to ensure we don't hold onto the grid in perpetuity if it's not needed.
fGridManager = nullptr;
}
fBruteForceManager.reset();
SkASSERT(fCurrentManager == &fBruteForceManager);
}
}
private:
const SkISize fDeviceSize;
const int fGridCellSize;
const int fMaxBruteForceN;
BoundsManager* fCurrentManager;
BruteForceBoundsManager fBruteForceManager;
// The grid manager starts out null and is created the first time we exceed fMaxBruteForceN.
// However, even if we reset back to the brute force manager, we keep the grid around under the
// assumption that the owning Device will have similar frame-to-frame draw counts and will need
// to upgrade to the grid manager again.
std::unique_ptr<GridBoundsManager> fGridManager;
void updateCurrentManagerIfNeeded() {
if (fCurrentManager == fGridManager.get() ||
fBruteForceManager.count() < fMaxBruteForceN) {
// Already using the grid or the about-to-be-recorded draw will not cause us to exceed
// the brute force limit, so no need to change the current manager implementation.
return;
}
// Else we need to switch from the brute force manager to the grid manager
if (!fGridManager) {
fGridManager = GridBoundsManager::MakeRes(fDeviceSize, fGridCellSize);
}
fCurrentManager = fGridManager.get();
// Fill out the grid manager with the recorded draws in the brute force manager
fBruteForceManager.replayDraws(fCurrentManager);
fBruteForceManager.reset();
}
};
} // namespace skgpu::graphite
#endif // skgpu_graphite_geom_BoundsManager_DEFINED