[graphite] Use finished callbacks in nanobench to manage frames.
In nanobench we want to try and simulate a GPUs swapbuffering and not get too far ahead on the CPU. Thus we use finished callbacks to know if we get more than 3 frames ahead of the GPU. This CL adds support for Graphite to do this. Bug: skia:12974 Change-Id: I8be505c5769399dcc0f5954f9f999f4448633647 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/525186 Reviewed-by: Michael Ludwig <michaelludwig@google.com> Commit-Queue: Greg Daniel <egdaniel@google.com> Reviewed-by: Jim Van Verth <jvanverth@google.com>
This commit is contained in:
parent
b2af4f4e8d
commit
5d67b1797a
@ -256,7 +256,7 @@ struct GPUTarget : public Target {
|
||||
this->contextInfo.testContext()->flushAndWaitOnSync(contextInfo.directContext());
|
||||
}
|
||||
}
|
||||
void fence() override { this->contextInfo.testContext()->finish(); }
|
||||
void syncCPU() override { this->contextInfo.testContext()->finish(); }
|
||||
|
||||
bool needsFrameTiming(int* maxFrameLag) const override {
|
||||
if (!this->contextInfo.testContext()->getMaxGpuFrameLag(maxFrameLag)) {
|
||||
@ -297,20 +297,16 @@ struct GPUTarget : public Target {
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
struct GraphiteTarget : public Target {
|
||||
explicit GraphiteTarget(const Config& c) : Target(c) {}
|
||||
using ContextInfo = skiatest::graphite::ContextFactory::ContextInfo;
|
||||
using TestContext = skiatest::graphite::GraphiteTestContext;
|
||||
using ContextFactory = skiatest::graphite::ContextFactory;
|
||||
|
||||
std::unique_ptr<ContextFactory> factory;
|
||||
|
||||
TestContext* testContext;
|
||||
skgpu::Context* context;
|
||||
std::unique_ptr<skgpu::Recorder> recorder;
|
||||
|
||||
~GraphiteTarget() override {
|
||||
// TODO: We need to get the ref counting correct for MtlPipeline and MTLDepthStencilState
|
||||
// since right now they live on the Recorder. Until then make sure the Context has finished
|
||||
// all its work.
|
||||
this->fence();
|
||||
}
|
||||
~GraphiteTarget() override {}
|
||||
|
||||
void setup() override {}
|
||||
|
||||
@ -318,17 +314,15 @@ struct GraphiteTarget : public Target {
|
||||
if (context && recorder) {
|
||||
std::unique_ptr<skgpu::Recording> recording = this->recorder->snap();
|
||||
if (recording) {
|
||||
skgpu::InsertRecordingInfo info;
|
||||
info.fRecording = recording.get();
|
||||
this->context->insertRecording(info);
|
||||
this->testContext->submitRecordingAndWaitOnSync(this->context, recording.get());
|
||||
}
|
||||
context->submit(skgpu::SyncToCpu::kNo);
|
||||
}
|
||||
}
|
||||
void fence() override {
|
||||
void syncCPU() override {
|
||||
if (context && recorder) {
|
||||
// TODO: have a way to sync work with out submitting a Recording which is currently
|
||||
// required.
|
||||
// required. Probably need to get to the point where the backend command buffers are
|
||||
// stored on the Context and not Recordings before this is feasible.
|
||||
std::unique_ptr<skgpu::Recording> recording = this->recorder->snap();
|
||||
if (recording) {
|
||||
skgpu::InsertRecordingInfo info;
|
||||
@ -340,14 +334,7 @@ struct GraphiteTarget : public Target {
|
||||
}
|
||||
|
||||
bool needsFrameTiming(int* maxFrameLag) const override {
|
||||
// TODO
|
||||
#if 0
|
||||
if (!this->contextInfo.testContext()->getMaxGpuFrameLag(maxFrameLag)) {
|
||||
// Frame lag is unknown.
|
||||
*maxFrameLag = FLAGS_gpuFrameLag;
|
||||
}
|
||||
#endif
|
||||
*maxFrameLag = FLAGS_gpuFrameLag;
|
||||
SkAssertResult(this->testContext->getMaxGpuFrameLag(maxFrameLag));
|
||||
return true;
|
||||
}
|
||||
bool init(SkImageInfo info, Benchmark* bench) override {
|
||||
@ -357,11 +344,12 @@ struct GraphiteTarget : public Target {
|
||||
// context options when we make the factory here.
|
||||
this->factory = std::make_unique<ContextFactory>();
|
||||
|
||||
auto [testContext, ctx] = this->factory->getContextInfo(this->config.graphiteCtxType);
|
||||
auto [testCtx, ctx] = this->factory->getContextInfo(this->config.graphiteCtxType);
|
||||
if (!ctx) {
|
||||
return false;
|
||||
}
|
||||
context = ctx;
|
||||
this->testContext = testCtx;
|
||||
this->context = ctx;
|
||||
|
||||
this->recorder = this->context->makeRecorder();
|
||||
if (!this->recorder) {
|
||||
@ -539,7 +527,7 @@ static int setup_gpu_bench(Target* target, Benchmark* bench, int maxGpuFrameLag)
|
||||
loops = clamp_loops(loops);
|
||||
|
||||
// Make sure we're not still timing our calibration.
|
||||
target->fence();
|
||||
target->syncCPU();
|
||||
} else {
|
||||
loops = detect_forever_loops(loops);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ struct Target {
|
||||
/** Called between benchmarks (or between calibration and measured
|
||||
runs) to make sure all pending work in drivers / threads is
|
||||
complete. */
|
||||
virtual void fence() { }
|
||||
virtual void syncCPU() { }
|
||||
|
||||
/** CPU-like targets can just be timed, but GPU-like
|
||||
targets need to pay attention to frame boundaries
|
||||
|
@ -74,6 +74,11 @@ public:
|
||||
void insertRecording(const InsertRecordingInfo&);
|
||||
void submit(SyncToCpu = SyncToCpu::kNo);
|
||||
|
||||
/**
|
||||
* Checks whether any asynchronous work is complete and if so calls related callbacks.
|
||||
*/
|
||||
void checkAsyncWorkCompletion();
|
||||
|
||||
void preCompile(const PaintCombo&);
|
||||
|
||||
/**
|
||||
|
@ -80,6 +80,10 @@ void Context::submit(SyncToCpu syncToCpu) {
|
||||
fGpu->checkForFinishedWork(syncToCpu);
|
||||
}
|
||||
|
||||
void Context::checkAsyncWorkCompletion() {
|
||||
fGpu->checkForFinishedWork(SyncToCpu::kNo);
|
||||
}
|
||||
|
||||
void Context::preCompile(const PaintCombo& paintCombo) {
|
||||
static const Renderer* kRenderers[] = {
|
||||
&Renderer::StencilTessellatedCurvesAndTris(SkPathFillType::kWinding),
|
||||
|
@ -10,6 +10,10 @@
|
||||
#include "include/gpu/GrDirectContext.h"
|
||||
#include "src/core/SkTraceEvent.h"
|
||||
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
#include "experimental/graphite/include/Context.h"
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
|
||||
namespace sk_gpu_test {
|
||||
@ -19,7 +23,16 @@ void FlushFinishTracker::waitTillFinished() {
|
||||
auto begin = std::chrono::steady_clock::now();
|
||||
auto end = begin;
|
||||
while (!fIsFinished && (end - begin) < std::chrono::seconds(2)) {
|
||||
fContext->checkAsyncWorkCompletion();
|
||||
if (fContext) {
|
||||
fContext->checkAsyncWorkCompletion();
|
||||
} else {
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
SkASSERT(fGraphiteContext);
|
||||
fGraphiteContext->checkAsyncWorkCompletion();
|
||||
#else
|
||||
SkDEBUGFAIL("No valid context");
|
||||
#endif
|
||||
}
|
||||
end = std::chrono::steady_clock::now();
|
||||
}
|
||||
if (!fIsFinished) {
|
||||
|
@ -12,6 +12,10 @@
|
||||
|
||||
class GrDirectContext;
|
||||
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
namespace skgpu { class Context; }
|
||||
#endif
|
||||
|
||||
namespace sk_gpu_test {
|
||||
|
||||
class FlushFinishTracker : public SkRefCnt {
|
||||
@ -23,13 +27,19 @@ public:
|
||||
}
|
||||
|
||||
FlushFinishTracker(GrDirectContext* context) : fContext(context) {}
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
FlushFinishTracker(skgpu::Context* context) : fGraphiteContext(context) {}
|
||||
#endif
|
||||
|
||||
void setFinished() { fIsFinished = true; }
|
||||
|
||||
void waitTillFinished();
|
||||
|
||||
private:
|
||||
GrDirectContext* fContext;
|
||||
GrDirectContext* fContext = nullptr;
|
||||
#ifdef SK_GRAPHITE_ENABLED
|
||||
skgpu::Context* fGraphiteContext = nullptr;
|
||||
#endif
|
||||
|
||||
// Currently we don't have the this bool be atomic cause all current uses of this class happen
|
||||
// on a single thread. In other words we call flush, checkAsyncWorkCompletion, and
|
||||
|
@ -7,10 +7,43 @@
|
||||
|
||||
#include "tools/graphite/GraphiteTestContext.h"
|
||||
|
||||
#include "experimental/graphite/include/Context.h"
|
||||
#include "experimental/graphite/include/GraphiteTypes.h"
|
||||
#include "experimental/graphite/include/Recording.h"
|
||||
#include "src/core/SkTraceEvent.h"
|
||||
#include "tools/gpu/FlushFinishTracker.h"
|
||||
|
||||
namespace skiatest::graphite {
|
||||
|
||||
GraphiteTestContext::GraphiteTestContext() {}
|
||||
|
||||
GraphiteTestContext::~GraphiteTestContext() {}
|
||||
|
||||
void GraphiteTestContext::submitRecordingAndWaitOnSync(skgpu::Context* context,
|
||||
skgpu::Recording* recording) {
|
||||
TRACE_EVENT0("skia.gpu", TRACE_FUNC);
|
||||
SkASSERT(context);
|
||||
SkASSERT(recording);
|
||||
|
||||
if (fFinishTrackers[fCurrentFlushIdx]) {
|
||||
fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
|
||||
}
|
||||
|
||||
fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
|
||||
|
||||
// We add an additional ref to the current flush tracker here. This ref is owned by the finish
|
||||
// callback on the flush call. The finish callback will unref the tracker when called.
|
||||
fFinishTrackers[fCurrentFlushIdx]->ref();
|
||||
|
||||
skgpu::InsertRecordingInfo info;
|
||||
info.fRecording = recording;
|
||||
info.fFinishedContext = fFinishTrackers[fCurrentFlushIdx].get();
|
||||
info.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
|
||||
context->insertRecording(info);
|
||||
|
||||
context->submit(skgpu::SyncToCpu::kNo);
|
||||
|
||||
fCurrentFlushIdx = (fCurrentFlushIdx + 1) % SK_ARRAY_COUNT(fFinishTrackers);
|
||||
}
|
||||
|
||||
} // namespace skiatest::graphite
|
||||
|
@ -11,7 +11,12 @@
|
||||
#include "experimental/graphite/include/GraphiteTypes.h"
|
||||
#include "include/core/SkRefCnt.h"
|
||||
|
||||
namespace skgpu { class Context; }
|
||||
namespace skgpu {
|
||||
class Context;
|
||||
class Recording;
|
||||
}
|
||||
|
||||
namespace sk_gpu_test { class FlushFinishTracker; }
|
||||
|
||||
namespace skiatest::graphite {
|
||||
|
||||
@ -30,7 +35,25 @@ public:
|
||||
|
||||
virtual std::unique_ptr<skgpu::Context> makeContext() = 0;
|
||||
|
||||
bool getMaxGpuFrameLag(int *maxFrameLag) const {
|
||||
*maxFrameLag = kMaxFrameLag;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This will insert a Recording and submit work to the GPU. Additionally, we will add a finished
|
||||
* callback to our insert recording call. We allow ourselves to have kMaxFrameLag number of
|
||||
* unfinished flushes active on the GPU at a time. If we have 2 outstanding flushes then we will
|
||||
* wait on the CPU until one has finished.
|
||||
*/
|
||||
void submitRecordingAndWaitOnSync(skgpu::Context*, skgpu::Recording*);
|
||||
|
||||
protected:
|
||||
static constexpr int kMaxFrameLag = 3;
|
||||
|
||||
sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
|
||||
int fCurrentFlushIdx = 0;
|
||||
|
||||
GraphiteTestContext();
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user