[graphite] Use finished callbacks in nanobench to manage frames.

In nanobench we want to try and simulate a GPUs swapbuffering and not
get too far ahead on the CPU. Thus we use finished callbacks to know if
we get more than 3 frames ahead of the GPU. This CL adds support for
Graphite to do this.

Bug: skia:12974
Change-Id: I8be505c5769399dcc0f5954f9f999f4448633647
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/525186
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Jim Van Verth <jvanverth@google.com>
This commit is contained in:
Greg Daniel 2022-03-28 15:27:44 -04:00 committed by SkCQ
parent b2af4f4e8d
commit 5d67b1797a
8 changed files with 105 additions and 29 deletions

View File

@ -256,7 +256,7 @@ struct GPUTarget : public Target {
this->contextInfo.testContext()->flushAndWaitOnSync(contextInfo.directContext()); this->contextInfo.testContext()->flushAndWaitOnSync(contextInfo.directContext());
} }
} }
void fence() override { this->contextInfo.testContext()->finish(); } void syncCPU() override { this->contextInfo.testContext()->finish(); }
bool needsFrameTiming(int* maxFrameLag) const override { bool needsFrameTiming(int* maxFrameLag) const override {
if (!this->contextInfo.testContext()->getMaxGpuFrameLag(maxFrameLag)) { if (!this->contextInfo.testContext()->getMaxGpuFrameLag(maxFrameLag)) {
@ -297,20 +297,16 @@ struct GPUTarget : public Target {
#ifdef SK_GRAPHITE_ENABLED #ifdef SK_GRAPHITE_ENABLED
struct GraphiteTarget : public Target { struct GraphiteTarget : public Target {
explicit GraphiteTarget(const Config& c) : Target(c) {} explicit GraphiteTarget(const Config& c) : Target(c) {}
using ContextInfo = skiatest::graphite::ContextFactory::ContextInfo; using TestContext = skiatest::graphite::GraphiteTestContext;
using ContextFactory = skiatest::graphite::ContextFactory; using ContextFactory = skiatest::graphite::ContextFactory;
std::unique_ptr<ContextFactory> factory; std::unique_ptr<ContextFactory> factory;
TestContext* testContext;
skgpu::Context* context; skgpu::Context* context;
std::unique_ptr<skgpu::Recorder> recorder; std::unique_ptr<skgpu::Recorder> recorder;
~GraphiteTarget() override { ~GraphiteTarget() override {}
// TODO: We need to get the ref counting correct for MtlPipeline and MTLDepthStencilState
// since right now they live on the Recorder. Until then make sure the Context has finished
// all its work.
this->fence();
}
void setup() override {} void setup() override {}
@ -318,17 +314,15 @@ struct GraphiteTarget : public Target {
if (context && recorder) { if (context && recorder) {
std::unique_ptr<skgpu::Recording> recording = this->recorder->snap(); std::unique_ptr<skgpu::Recording> recording = this->recorder->snap();
if (recording) { if (recording) {
skgpu::InsertRecordingInfo info; this->testContext->submitRecordingAndWaitOnSync(this->context, recording.get());
info.fRecording = recording.get();
this->context->insertRecording(info);
} }
context->submit(skgpu::SyncToCpu::kNo);
} }
} }
void fence() override { void syncCPU() override {
if (context && recorder) { if (context && recorder) {
// TODO: have a way to sync work with out submitting a Recording which is currently // TODO: have a way to sync work with out submitting a Recording which is currently
// required. // required. Probably need to get to the point where the backend command buffers are
// stored on the Context and not Recordings before this is feasible.
std::unique_ptr<skgpu::Recording> recording = this->recorder->snap(); std::unique_ptr<skgpu::Recording> recording = this->recorder->snap();
if (recording) { if (recording) {
skgpu::InsertRecordingInfo info; skgpu::InsertRecordingInfo info;
@ -340,14 +334,7 @@ struct GraphiteTarget : public Target {
} }
bool needsFrameTiming(int* maxFrameLag) const override { bool needsFrameTiming(int* maxFrameLag) const override {
// TODO SkAssertResult(this->testContext->getMaxGpuFrameLag(maxFrameLag));
#if 0
if (!this->contextInfo.testContext()->getMaxGpuFrameLag(maxFrameLag)) {
// Frame lag is unknown.
*maxFrameLag = FLAGS_gpuFrameLag;
}
#endif
*maxFrameLag = FLAGS_gpuFrameLag;
return true; return true;
} }
bool init(SkImageInfo info, Benchmark* bench) override { bool init(SkImageInfo info, Benchmark* bench) override {
@ -357,11 +344,12 @@ struct GraphiteTarget : public Target {
// context options when we make the factory here. // context options when we make the factory here.
this->factory = std::make_unique<ContextFactory>(); this->factory = std::make_unique<ContextFactory>();
auto [testContext, ctx] = this->factory->getContextInfo(this->config.graphiteCtxType); auto [testCtx, ctx] = this->factory->getContextInfo(this->config.graphiteCtxType);
if (!ctx) { if (!ctx) {
return false; return false;
} }
context = ctx; this->testContext = testCtx;
this->context = ctx;
this->recorder = this->context->makeRecorder(); this->recorder = this->context->makeRecorder();
if (!this->recorder) { if (!this->recorder) {
@ -539,7 +527,7 @@ static int setup_gpu_bench(Target* target, Benchmark* bench, int maxGpuFrameLag)
loops = clamp_loops(loops); loops = clamp_loops(loops);
// Make sure we're not still timing our calibration. // Make sure we're not still timing our calibration.
target->fence(); target->syncCPU();
} else { } else {
loops = detect_forever_loops(loops); loops = detect_forever_loops(loops);
} }

View File

@ -54,7 +54,7 @@ struct Target {
/** Called between benchmarks (or between calibration and measured /** Called between benchmarks (or between calibration and measured
runs) to make sure all pending work in drivers / threads is runs) to make sure all pending work in drivers / threads is
complete. */ complete. */
virtual void fence() { } virtual void syncCPU() { }
/** CPU-like targets can just be timed, but GPU-like /** CPU-like targets can just be timed, but GPU-like
targets need to pay attention to frame boundaries targets need to pay attention to frame boundaries

View File

@ -74,6 +74,11 @@ public:
void insertRecording(const InsertRecordingInfo&); void insertRecording(const InsertRecordingInfo&);
void submit(SyncToCpu = SyncToCpu::kNo); void submit(SyncToCpu = SyncToCpu::kNo);
/**
* Checks whether any asynchronous work is complete and if so calls related callbacks.
*/
void checkAsyncWorkCompletion();
void preCompile(const PaintCombo&); void preCompile(const PaintCombo&);
/** /**

View File

@ -80,6 +80,10 @@ void Context::submit(SyncToCpu syncToCpu) {
fGpu->checkForFinishedWork(syncToCpu); fGpu->checkForFinishedWork(syncToCpu);
} }
void Context::checkAsyncWorkCompletion() {
fGpu->checkForFinishedWork(SyncToCpu::kNo);
}
void Context::preCompile(const PaintCombo& paintCombo) { void Context::preCompile(const PaintCombo& paintCombo) {
static const Renderer* kRenderers[] = { static const Renderer* kRenderers[] = {
&Renderer::StencilTessellatedCurvesAndTris(SkPathFillType::kWinding), &Renderer::StencilTessellatedCurvesAndTris(SkPathFillType::kWinding),

View File

@ -10,6 +10,10 @@
#include "include/gpu/GrDirectContext.h" #include "include/gpu/GrDirectContext.h"
#include "src/core/SkTraceEvent.h" #include "src/core/SkTraceEvent.h"
#ifdef SK_GRAPHITE_ENABLED
#include "experimental/graphite/include/Context.h"
#endif
#include <chrono> #include <chrono>
namespace sk_gpu_test { namespace sk_gpu_test {
@ -19,7 +23,16 @@ void FlushFinishTracker::waitTillFinished() {
auto begin = std::chrono::steady_clock::now(); auto begin = std::chrono::steady_clock::now();
auto end = begin; auto end = begin;
while (!fIsFinished && (end - begin) < std::chrono::seconds(2)) { while (!fIsFinished && (end - begin) < std::chrono::seconds(2)) {
fContext->checkAsyncWorkCompletion(); if (fContext) {
fContext->checkAsyncWorkCompletion();
} else {
#ifdef SK_GRAPHITE_ENABLED
SkASSERT(fGraphiteContext);
fGraphiteContext->checkAsyncWorkCompletion();
#else
SkDEBUGFAIL("No valid context");
#endif
}
end = std::chrono::steady_clock::now(); end = std::chrono::steady_clock::now();
} }
if (!fIsFinished) { if (!fIsFinished) {

View File

@ -12,6 +12,10 @@
class GrDirectContext; class GrDirectContext;
#ifdef SK_GRAPHITE_ENABLED
namespace skgpu { class Context; }
#endif
namespace sk_gpu_test { namespace sk_gpu_test {
class FlushFinishTracker : public SkRefCnt { class FlushFinishTracker : public SkRefCnt {
@ -23,13 +27,19 @@ public:
} }
FlushFinishTracker(GrDirectContext* context) : fContext(context) {} FlushFinishTracker(GrDirectContext* context) : fContext(context) {}
#ifdef SK_GRAPHITE_ENABLED
FlushFinishTracker(skgpu::Context* context) : fGraphiteContext(context) {}
#endif
void setFinished() { fIsFinished = true; } void setFinished() { fIsFinished = true; }
void waitTillFinished(); void waitTillFinished();
private: private:
GrDirectContext* fContext; GrDirectContext* fContext = nullptr;
#ifdef SK_GRAPHITE_ENABLED
skgpu::Context* fGraphiteContext = nullptr;
#endif
// Currently we don't have the this bool be atomic cause all current uses of this class happen // Currently we don't have the this bool be atomic cause all current uses of this class happen
// on a single thread. In other words we call flush, checkAsyncWorkCompletion, and // on a single thread. In other words we call flush, checkAsyncWorkCompletion, and

View File

@ -7,10 +7,43 @@
#include "tools/graphite/GraphiteTestContext.h" #include "tools/graphite/GraphiteTestContext.h"
#include "experimental/graphite/include/Context.h"
#include "experimental/graphite/include/GraphiteTypes.h"
#include "experimental/graphite/include/Recording.h"
#include "src/core/SkTraceEvent.h"
#include "tools/gpu/FlushFinishTracker.h"
namespace skiatest::graphite { namespace skiatest::graphite {
GraphiteTestContext::GraphiteTestContext() {} GraphiteTestContext::GraphiteTestContext() {}
GraphiteTestContext::~GraphiteTestContext() {} GraphiteTestContext::~GraphiteTestContext() {}
void GraphiteTestContext::submitRecordingAndWaitOnSync(skgpu::Context* context,
skgpu::Recording* recording) {
TRACE_EVENT0("skia.gpu", TRACE_FUNC);
SkASSERT(context);
SkASSERT(recording);
if (fFinishTrackers[fCurrentFlushIdx]) {
fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
}
fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
// We add an additional ref to the current flush tracker here. This ref is owned by the finish
// callback on the flush call. The finish callback will unref the tracker when called.
fFinishTrackers[fCurrentFlushIdx]->ref();
skgpu::InsertRecordingInfo info;
info.fRecording = recording;
info.fFinishedContext = fFinishTrackers[fCurrentFlushIdx].get();
info.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
context->insertRecording(info);
context->submit(skgpu::SyncToCpu::kNo);
fCurrentFlushIdx = (fCurrentFlushIdx + 1) % SK_ARRAY_COUNT(fFinishTrackers);
}
} // namespace skiatest::graphite } // namespace skiatest::graphite

View File

@ -11,7 +11,12 @@
#include "experimental/graphite/include/GraphiteTypes.h" #include "experimental/graphite/include/GraphiteTypes.h"
#include "include/core/SkRefCnt.h" #include "include/core/SkRefCnt.h"
namespace skgpu { class Context; } namespace skgpu {
class Context;
class Recording;
}
namespace sk_gpu_test { class FlushFinishTracker; }
namespace skiatest::graphite { namespace skiatest::graphite {
@ -30,7 +35,25 @@ public:
virtual std::unique_ptr<skgpu::Context> makeContext() = 0; virtual std::unique_ptr<skgpu::Context> makeContext() = 0;
bool getMaxGpuFrameLag(int *maxFrameLag) const {
*maxFrameLag = kMaxFrameLag;
return true;
}
/**
* This will insert a Recording and submit work to the GPU. Additionally, we will add a finished
* callback to our insert recording call. We allow ourselves to have kMaxFrameLag number of
* unfinished flushes active on the GPU at a time. If we have 2 outstanding flushes then we will
* wait on the CPU until one has finished.
*/
void submitRecordingAndWaitOnSync(skgpu::Context*, skgpu::Recording*);
protected: protected:
static constexpr int kMaxFrameLag = 3;
sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
int fCurrentFlushIdx = 0;
GraphiteTestContext(); GraphiteTestContext();
}; };