[graphite] Add HybridBoundsManager and use as default impl
This combines the BruteForceManager and Gridmanager, starting with brute force when it's fastest for low N and then transitioning to less accurate grid that scales for larger N. Also updates the set of benchmarks to run based on more reasonable configs (e.g. having a reasonable level of accuracy to be worth considering). Updates Device to use the HybridBoundsManager with brute force up to 64 draws, and then a grid configured to make 16x16 pixel cells. My guess is we will see a mix of perf regressions and improvements with this. The existing use of the NaiveBoundsManager had negligible CPU cost but disallowed all re-ordering. The brute force and grid managers will add CPU cost but enable re-ordering, which shows up as shorter command buffers (e.g. 17k commands vs. 28k commands in the motionmark suits benchmark). However, because we don't have SSBOs there still isn't as much batching that would let the GPU take advantage of this re-ordering so I'm not sure how visible the wins will be yet. Bug: skia:13201, skia:12787 Change-Id: Iad58fccab45def5f702a30860e063669424dfcf2 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/550518 Reviewed-by: Jim Van Verth <jvanverth@google.com> Reviewed-by: Nicolette Prevost <nicolettep@google.com> Commit-Queue: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
parent
66b0b198ff
commit
e9eda5802a
@ -88,7 +88,7 @@ public:
|
|||||||
int numRandomRects)
|
int numRandomRects)
|
||||||
: BoundsManagerBench(std::move(manager))
|
: BoundsManagerBench(std::move(manager))
|
||||||
, fNumRandomRects(numRandomRects) {
|
, fNumRandomRects(numRandomRects) {
|
||||||
fName.printf("BoundsManager_rand%i_%s", numRandomRects, managerName);
|
fName.printf("BoundsManager_rand_%i_%s", numRandomRects, managerName);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -171,10 +171,10 @@ private:
|
|||||||
|
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::NaiveBoundsManager>(), "naive")
|
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::NaiveBoundsManager>(), "naive")
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::BruteForceBoundsManager>(), "brute")
|
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::BruteForceBoundsManager>(), "brute")
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 16), "grid16")
|
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 64), "grid64")
|
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 128), "grid128")
|
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 128), "grid128")
|
||||||
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 512), "grid512")
|
DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::Make({1800, 1800}, 512), "grid512")
|
||||||
|
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::HybridBoundsManager>(SkISize{1800, 1800}, 16, 64), "hybrid16x16n128")
|
||||||
|
DEF_BOUNDS_MANAGER_BENCH_SET(std::make_unique<skgpu::graphite::HybridBoundsManager>(SkISize{1800, 1800}, 16, 128), "hybrid16x16n256")
|
||||||
// Uncomment and adjust device size to match reported bounds from --boundsManagerFile
|
// Uncomment and adjust device size to match reported bounds from --boundsManagerFile
|
||||||
// DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::MakeRes({w, h}, 8), "gridRes8")
|
// DEF_BOUNDS_MANAGER_BENCH_SET(skgpu::graphite::GridBoundsManager::MakeRes({w, h}, 8), "gridRes8")
|
||||||
|
|
||||||
|
@ -186,12 +186,25 @@ sk_sp<Device> Device::Make(Recorder* recorder,
|
|||||||
return sk_sp<Device>(new Device(recorder, std::move(dc)));
|
return sk_sp<Device>(new Device(recorder, std::move(dc)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// These default tuning numbers for the HybridBoundsManager were chosen from looking at performance
|
||||||
|
// and accuracy curves produced by the BoundsManagerBench for random draw bounding boxes. This
|
||||||
|
// config will use brute force for the first 64 draw calls to the Device and then switch to a grid
|
||||||
|
// that is dynamically sized to produce cells that are 16x16, which seemed to be in the sweet spot
|
||||||
|
// for maintaining good performance without becoming too inaccurate.
|
||||||
|
// TODO: These could be exposed as context options or surface options, and we may want to have
|
||||||
|
// different strategies in place for a base device vs. a layer's device.
|
||||||
|
static constexpr int kGridCellSize = 16;
|
||||||
|
static constexpr int kMaxBruteForceN = 64;
|
||||||
|
|
||||||
Device::Device(Recorder* recorder, sk_sp<DrawContext> dc)
|
Device::Device(Recorder* recorder, sk_sp<DrawContext> dc)
|
||||||
: SkBaseDevice(dc->imageInfo(), SkSurfaceProps())
|
: SkBaseDevice(dc->imageInfo(), SkSurfaceProps())
|
||||||
, fRecorder(recorder)
|
, fRecorder(recorder)
|
||||||
, fDC(std::move(dc))
|
, fDC(std::move(dc))
|
||||||
, fClip(this)
|
, fClip(this)
|
||||||
, fColorDepthBoundsManager(std::make_unique<NaiveBoundsManager>())
|
, fColorDepthBoundsManager(
|
||||||
|
std::make_unique<HybridBoundsManager>(fDC->imageInfo().dimensions(),
|
||||||
|
kGridCellSize,
|
||||||
|
kMaxBruteForceN))
|
||||||
, fDisjointStencilSet(std::make_unique<IntersectionTreeSet>())
|
, fDisjointStencilSet(std::make_unique<IntersectionTreeSet>())
|
||||||
, fCachedLocalToDevice(SkM44())
|
, fCachedLocalToDevice(SkM44())
|
||||||
, fCurrentDepth(DrawOrder::kClearDepth)
|
, fCurrentDepth(DrawOrder::kClearDepth)
|
||||||
|
@ -81,10 +81,8 @@ public:
|
|||||||
CompressedPaintersOrder max = CompressedPaintersOrder::First();
|
CompressedPaintersOrder max = CompressedPaintersOrder::First();
|
||||||
auto orderIter = fOrders.items().begin();
|
auto orderIter = fOrders.items().begin();
|
||||||
for (const Rect& r : fRects.items()) {
|
for (const Rect& r : fRects.items()) {
|
||||||
if (r.intersects(boundsComplement)) {
|
if (r.intersects(boundsComplement) && max < *orderIter) {
|
||||||
if (max < *orderIter) {
|
max = *orderIter;
|
||||||
max = *orderIter;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
++orderIter;
|
++orderIter;
|
||||||
}
|
}
|
||||||
@ -101,6 +99,16 @@ public:
|
|||||||
fOrders.reset();
|
fOrders.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int count() const { return fRects.count(); }
|
||||||
|
|
||||||
|
void replayDraws(BoundsManager* manager) const {
|
||||||
|
auto orderIter = fOrders.items().begin();
|
||||||
|
for (const Rect& r : fRects.items()) {
|
||||||
|
manager->recordDraw(r, *orderIter);
|
||||||
|
++orderIter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// fRects and fOrders are parallel, but kept separate to avoid wasting padding since Rect is
|
// fRects and fOrders are parallel, but kept separate to avoid wasting padding since Rect is
|
||||||
// an over-aligned type.
|
// an over-aligned type.
|
||||||
@ -108,6 +116,7 @@ private:
|
|||||||
SkTBlockList<CompressedPaintersOrder> fOrders{16, SkBlockAllocator::GrowthPolicy::kFibonacci};
|
SkTBlockList<CompressedPaintersOrder> fOrders{16, SkBlockAllocator::GrowthPolicy::kFibonacci};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A BoundsManager that tracks highest CompressedPaintersOrder over a uniform spatial grid.
|
||||||
class GridBoundsManager : public BoundsManager {
|
class GridBoundsManager : public BoundsManager {
|
||||||
public:
|
public:
|
||||||
// 'gridSize' is the number of cells in the X and Y directions, splitting the pixels from [0,0]
|
// 'gridSize' is the number of cells in the X and Y directions, splitting the pixels from [0,0]
|
||||||
@ -215,6 +224,86 @@ private:
|
|||||||
SkAutoTMalloc<CompressedPaintersOrder> fNodes;
|
SkAutoTMalloc<CompressedPaintersOrder> fNodes;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A BoundsManager that first relies on BruteForceBoundsManager for N draw calls, and then switches
|
||||||
|
// to the GridBoundsManager if it exceeds its limit. For low N, the brute force approach is
|
||||||
|
// surprisingly efficient, has the highest accuracy, and very low memory overhead. Once the draw
|
||||||
|
// call count is large enough, the grid's lower performance complexity outweigh its memory cost and
|
||||||
|
// reduced accuracy.
|
||||||
|
class HybridBoundsManager : public BoundsManager {
|
||||||
|
public:
|
||||||
|
HybridBoundsManager(const SkISize& deviceSize,
|
||||||
|
int gridCellSize,
|
||||||
|
int maxBruteForceN)
|
||||||
|
: fDeviceSize(deviceSize)
|
||||||
|
, fGridCellSize(gridCellSize)
|
||||||
|
, fMaxBruteForceN(maxBruteForceN)
|
||||||
|
, fCurrentManager(&fBruteForceManager) {
|
||||||
|
SkASSERT(deviceSize.width() >= 1 && deviceSize.height() >= 1 &&
|
||||||
|
gridCellSize >= 1 && maxBruteForceN >= 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
CompressedPaintersOrder getMostRecentDraw(const Rect& bounds) const override {
|
||||||
|
return fCurrentManager->getMostRecentDraw(bounds);
|
||||||
|
}
|
||||||
|
|
||||||
|
void recordDraw(const Rect& bounds, CompressedPaintersOrder order) override {
|
||||||
|
this->updateCurrentManagerIfNeeded();
|
||||||
|
fCurrentManager->recordDraw(bounds, order);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() override {
|
||||||
|
const bool usedGrid = fCurrentManager == fGridManager.get();
|
||||||
|
if (usedGrid) {
|
||||||
|
// Reset the grid manager so it's ready to use next frame, but don't delete it.
|
||||||
|
fGridManager->reset();
|
||||||
|
// Assume brute force manager was reset when we swapped to the grid originally
|
||||||
|
fCurrentManager = &fBruteForceManager;
|
||||||
|
} else {
|
||||||
|
if (fGridManager) {
|
||||||
|
// Clean up the grid manager that was created over a frame ago without being used.
|
||||||
|
// This could lead to re-allocating the grid every-other frame, but it's a simple
|
||||||
|
// way to ensure we don't hold onto the grid in perpetuity if it's not needed.
|
||||||
|
fGridManager = nullptr;
|
||||||
|
}
|
||||||
|
fBruteForceManager.reset();
|
||||||
|
SkASSERT(fCurrentManager == &fBruteForceManager);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const SkISize fDeviceSize;
|
||||||
|
const int fGridCellSize;
|
||||||
|
const int fMaxBruteForceN;
|
||||||
|
|
||||||
|
BoundsManager* fCurrentManager;
|
||||||
|
|
||||||
|
BruteForceBoundsManager fBruteForceManager;
|
||||||
|
|
||||||
|
// The grid manager starts out null and is created the first time we exceed fMaxBruteForceN.
|
||||||
|
// However, even if we reset back to the brute force manager, we keep the grid around under the
|
||||||
|
// assumption that the owning Device will have similar frame-to-frame draw counts and will need
|
||||||
|
// to upgrade to the grid manager again.
|
||||||
|
std::unique_ptr<GridBoundsManager> fGridManager;
|
||||||
|
|
||||||
|
void updateCurrentManagerIfNeeded() {
|
||||||
|
if (fCurrentManager == fGridManager.get() ||
|
||||||
|
fBruteForceManager.count() < fMaxBruteForceN) {
|
||||||
|
// Already using the grid or the about-to-be-recorded draw will not cause us to exceed
|
||||||
|
// the brute force limit, so no need to change the current manager implementation.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Else we need to switch from the brute force manager to the grid manager
|
||||||
|
if (!fGridManager) {
|
||||||
|
fGridManager = GridBoundsManager::MakeRes(fDeviceSize, fGridCellSize);
|
||||||
|
}
|
||||||
|
fCurrentManager = fGridManager.get();
|
||||||
|
|
||||||
|
// Fill out the grid manager with the recorded draws in the brute force manager
|
||||||
|
fBruteForceManager.replayDraws(fCurrentManager);
|
||||||
|
fBruteForceManager.reset();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace skgpu::graphite
|
} // namespace skgpu::graphite
|
||||||
|
|
||||||
#endif // skgpu_graphite_geom_BoundsManager_DEFINED
|
#endif // skgpu_graphite_geom_BoundsManager_DEFINED
|
||||||
|
Loading…
Reference in New Issue
Block a user