Merge adjacent GrOpsTasks with same target together

This allows the ops tasks to make one render pass instead of multiple.
The only case where this merging is needed is as a result of
reordering (reduceOpsTaskSplitting).

Bug: skia:10877
Change-Id: Ia967ead6efc43f7d2c1da58f770d3987da690cda
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/353656
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Adlai Holler <adlai@google.com>
This commit is contained in:
Adlai Holler 2021-01-22 14:56:36 -05:00 committed by Skia Commit-Bot
parent eeeaae9a02
commit 3b5b7d1178
3 changed files with 84 additions and 8 deletions

View File

@ -424,17 +424,20 @@ void GrDrawingManager::sortTasks() {
} }
// Reorder the array to match the llist without reffing & unreffing sk_sp's. // Reorder the array to match the llist without reffing & unreffing sk_sp's.
// Both args must contain the same objects. // The llist must contain a subset of the entries in the array.
// This is basically a shim because clustering uses LList but the rest of drawmgr uses array. // This is basically a shim because clustering uses LList but the rest of drawmgr uses array.
// Pointers in the array are not dereferenced.
template <typename T> template <typename T>
static void reorder_array_by_llist(const SkTInternalLList<T>& llist, SkTArray<sk_sp<T>>* array) { static void reorder_array_by_llist(const SkTInternalLList<T>& llist, SkTArray<sk_sp<T>>* array) {
for (sk_sp<T>& t : *array) {
[[maybe_unused]] T* old = t.release();
}
int i = 0; int i = 0;
for (T* t : llist) { for (T* t : llist) {
// Release the pointer that used to live here so it doesn't get unreffed.
[[maybe_unused]] T* old = array->at(i).release();
array->at(i++).reset(t); array->at(i++).reset(t);
} }
SkASSERT(i == array->count()); SkASSERT(i <= array->count());
array->resize_back(i);
} }
void GrDrawingManager::reorderTasks() { void GrDrawingManager::reorderTasks() {
@ -446,6 +449,30 @@ void GrDrawingManager::reorderTasks() {
} }
// TODO: Handle case where proposed order would blow our memory budget. // TODO: Handle case where proposed order would blow our memory budget.
// Such cases are currently pathological, so we could just return here and keep current order. // Such cases are currently pathological, so we could just return here and keep current order.
// Merge adjacent ops tasks. Note: We remove (future) tasks from the list during iteration.
// This works out, however, because when we access the next element in llist it will be valid.
for (auto task : llist) {
auto opsTask = task->asOpsTask();
if (!opsTask) {
continue;
}
int removedCount = opsTask->mergeFromLList();
auto removedTask = opsTask->fNext;
for (int i = 0; i < removedCount; i++) {
auto next = removedTask->fNext;
llist.remove(removedTask);
// After this unref, there will be a dangling sk_sp to this task in fDAG somewhere.
// That dangling pointer will be removed in reorder_array_by_llist.
removedTask->disown(this);
removedTask->unref();
removedTask = next;
}
}
reorder_array_by_llist(llist, &fDAG); reorder_array_by_llist(llist, &fDAG);
} }

View File

@ -429,7 +429,7 @@ void GrOpsTask::addDrawOp(GrDrawingManager* drawingMgr, GrOp::Owner op,
void GrOpsTask::endFlush(GrDrawingManager* drawingMgr) { void GrOpsTask::endFlush(GrDrawingManager* drawingMgr) {
fLastClipStackGenID = SK_InvalidUniqueID; fLastClipStackGenID = SK_InvalidUniqueID;
this->deleteOps(); this->deleteOps();
fClipAllocator.reset(); fClipAllocators.reset();
fDeferredProxies.reset(); fDeferredProxies.reset();
fSampledProxies.reset(); fSampledProxies.reset();
@ -677,6 +677,50 @@ void GrOpsTask::setColorLoadOp(GrLoadOp op, std::array<float, 4> color) {
} }
} }
int GrOpsTask::mergeFromLList() {
GrOpsTask* last = this;
int addlProxyCount = 0;
int addlOpChainCount = 0;
int mergedCount = 0;
for (GrRenderTask* task = fNext; task; task = task->fNext) {
auto opsTask = task->asOpsTask();
if (!opsTask || opsTask->target(0) != this->target(0)) {
break;
}
SkASSERT(fTargetSwizzle == opsTask->fTargetSwizzle);
SkASSERT(fTargetOrigin == opsTask->fTargetOrigin);
mergedCount += 1;
addlProxyCount += opsTask->fSampledProxies.count();
addlOpChainCount += opsTask->fOpChains.count();
fClippedContentBounds.join(opsTask->fClippedContentBounds);
fTotalBounds.join(opsTask->fTotalBounds);
fRenderPassXferBarriers |= opsTask->fRenderPassXferBarriers;
SkDEBUGCODE(fNumClips += opsTask->fNumClips);
last = opsTask;
}
if (last == this) {
return 0;
}
fLastClipStackGenID = SK_InvalidUniqueID;
fSampledProxies.reserve_back(addlProxyCount);
fOpChains.reserve_back(addlOpChainCount);
fClipAllocators.reserve_back(mergedCount);
for (auto task = fNext; task != last->fNext; task = task->fNext) {
auto opsTask = reinterpret_cast<GrOpsTask*>(task);
fSampledProxies.move_back_n(opsTask->fSampledProxies.count(),
opsTask->fSampledProxies.data());
fOpChains.move_back_n(opsTask->fOpChains.count(),
opsTask->fOpChains.data());
SkASSERT(1 == opsTask->fClipAllocators.count());
fClipAllocators.push_back(std::move(opsTask->fClipAllocators[0]));
opsTask->fClipAllocators.reset();
opsTask->fSampledProxies.reset();
opsTask->fOpChains.reset();
}
fMustPreserveStencil = last->fMustPreserveStencil;
return mergedCount;
}
bool GrOpsTask::resetForFullscreenClear(CanDiscardPreviousOps canDiscardPreviousOps) { bool GrOpsTask::resetForFullscreenClear(CanDiscardPreviousOps canDiscardPreviousOps) {
if (CanDiscardPreviousOps::kYes == canDiscardPreviousOps || this->isEmpty()) { if (CanDiscardPreviousOps::kYes == canDiscardPreviousOps || this->isEmpty()) {
this->deleteOps(); this->deleteOps();
@ -918,7 +962,7 @@ void GrOpsTask::recordOp(
GrOP_INFO("\t\tBackward: FirstOp\n"); GrOP_INFO("\t\tBackward: FirstOp\n");
} }
if (clip) { if (clip) {
clip = fClipAllocator.make<GrAppliedClip>(std::move(*clip)); clip = fClipAllocators[0].make<GrAppliedClip>(std::move(*clip));
SkDEBUGCODE(fNumClips++;) SkDEBUGCODE(fNumClips++;)
} }
fOpChains.emplace_back(std::move(op), processorAnalysis, clip, dstProxyView); fOpChains.emplace_back(std::move(op), processorAnalysis, clip, dstProxyView);

View File

@ -54,7 +54,7 @@ public:
void onPrePrepare(GrRecordingContext*) override; void onPrePrepare(GrRecordingContext*) override;
/** /**
* Together these two functions flush all queued up draws to GrCommandBuffer. The return value * Together these two functions flush all queued up draws to GrCommandBuffer. The return value
* of executeOps() indicates whether any commands were actually issued to the GPU. * of onExecute() indicates whether any commands were actually issued to the GPU.
*/ */
void onPrepare(GrOpFlushState* flushState) override; void onPrepare(GrOpFlushState* flushState) override;
bool onExecute(GrOpFlushState* flushState) override; bool onExecute(GrOpFlushState* flushState) override;
@ -89,6 +89,10 @@ public:
// Must only be called if native color buffer clearing is enabled. // Must only be called if native color buffer clearing is enabled.
void setColorLoadOp(GrLoadOp op, std::array<float, 4> color = {0, 0, 0, 0}); void setColorLoadOp(GrLoadOp op, std::array<float, 4> color = {0, 0, 0, 0});
// Merge opsTasks after 'this' starting from 'fNext' into 'this'. They should all be
// renderPass compatible. Return the number of tasks merged into 'this'.
int mergeFromLList();
#ifdef SK_DEBUG #ifdef SK_DEBUG
int numClips() const override { return fNumClips; } int numClips() const override { return fNumClips; }
void visitProxies_debugOnly(const GrOp::VisitProxyFunc&) const override; void visitProxies_debugOnly(const GrOp::VisitProxyFunc&) const override;
@ -270,7 +274,8 @@ private:
// MDB TODO: 4096 for the first allocation of the clip space will be huge overkill. // MDB TODO: 4096 for the first allocation of the clip space will be huge overkill.
// Gather statistics to determine the correct size. // Gather statistics to determine the correct size.
SkArenaAllocWithReset fClipAllocator{4096}; // TODO: Move the clips onto the recordTimeAllocator after CCPR is removed.
SkSTArray<1, SkArenaAlloc> fClipAllocators = { SkArenaAlloc(4096) };
SkDEBUGCODE(int fNumClips;) SkDEBUGCODE(int fNumClips;)
// TODO: We could look into this being a set if we find we're adding a lot of duplicates that is // TODO: We could look into this being a set if we find we're adding a lot of duplicates that is