Add new gpu sink for DDLs

Having this as a sink rather than a Via allows us to do more aggressive things with threads and shared contexts.

Change-Id: I3ca1076686fa4f53387c12a9506e01910c1bc3e4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/272016
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Robert Phillips <robertphillips@google.com>
This commit is contained in:
Robert Phillips 2020-02-19 14:14:47 -05:00 committed by Skia Commit-Bot
parent 9d4e31d6cd
commit 291f3405f3
5 changed files with 183 additions and 0 deletions

View File

@ -944,6 +944,8 @@ static Sink* create_sink(const GrContextOptions& grCtxOptions, const SkCommandLi
return new GPUPersistentCacheTestingSink(gpuConfig, grCtxOptions);
} else if (gpuConfig->getTestPrecompile()) {
return new GPUPrecompileTestingSink(gpuConfig, grCtxOptions);
} else if (gpuConfig->getUseDDLSink()) {
return new GPUDDLSink(gpuConfig, grCtxOptions);
} else {
return new GPUSink(gpuConfig, grCtxOptions);
}

View File

@ -79,6 +79,7 @@ static DEFINE_bool(RAW_threading, true, "Allow RAW decodes to run on multiple th
DECLARE_int(gpuThreads);
using sk_gpu_test::GrContextFactory;
using sk_gpu_test::ContextInfo;
namespace DM {
@ -1615,6 +1616,148 @@ Result GPUPrecompileTestingSink::draw(const Src& src, SkBitmap* dst, SkWStream*
return compare_bitmaps(reference, *dst);
}
GPUDDLSink::GPUDDLSink(const SkCommandLineConfigGpu* config, const GrContextOptions& grCtxOptions)
: INHERITED(config, grCtxOptions)
, fRecordingThreadPool(SkExecutor::MakeFIFOThreadPool(2))
, fGPUThread(SkExecutor::MakeFIFOThreadPool(1)) {
}
Result GPUDDLSink::ddlDraw(const Src& src,
sk_sp<SkSurface> dstSurface,
SkTaskGroup* recordingTaskGroup,
SkTaskGroup* gpuTaskGroup,
GrContext* gpuThreadCtx) const {
auto size = src.size();
SkPictureRecorder recorder;
Result result = src.draw(recorder.beginRecording(SkIntToScalar(size.width()),
SkIntToScalar(size.height())));
if (!result.isOk()) {
return result;
}
sk_sp<SkPicture> inputPicture(recorder.finishRecordingAsPicture());
// this is our ultimate final drawing area/rect
SkIRect viewport = SkIRect::MakeWH(size.fWidth, size.fHeight);
DDLPromiseImageHelper promiseImageHelper;
sk_sp<SkData> compressedPictureData = promiseImageHelper.deflateSKP(inputPicture.get());
if (!compressedPictureData) {
return Result::Fatal("GPUDDLSink: Couldn't deflate SkPicture");
}
promiseImageHelper.createCallbackContexts(gpuThreadCtx);
// TODO: move the image upload to the utility thread
promiseImageHelper.uploadAllToGPU(gpuTaskGroup, gpuThreadCtx);
constexpr int kNumDivisions = 3;
DDLTileHelper tiles(dstSurface, viewport, kNumDivisions);
// Reinflate the compressed picture individually for each thread.
tiles.createSKPPerTile(compressedPictureData.get(), promiseImageHelper);
tiles.kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, gpuThreadCtx);
// This should be the only explicit flush for the entire DDL draw
gpuTaskGroup->add([gpuThreadCtx]() { gpuThreadCtx->flush(); });
// All the work is schedule we just need to wait
recordingTaskGroup->wait(); // This should be a no-op at this point
gpuTaskGroup->wait();
return Result::Ok();
}
Result GPUDDLSink::draw(const Src& src, SkBitmap* dst, SkWStream* stream, SkString* log) const {
GrContextOptions contextOptions = this->baseContextOptions();
src.modifyGrContextOptions(&contextOptions);
contextOptions.fPersistentCache = nullptr;
contextOptions.fExecutor = nullptr;
GrContextFactory factory(contextOptions);
// This captures the context destined to be the main gpu context
ContextInfo mainCtxInfo = factory.getContextInfo(this->contextType(), this->contextOverrides());
sk_gpu_test::TestContext* mainTestCtx = mainCtxInfo.testContext();
GrContext* mainCtx = mainCtxInfo.grContext();
if (!mainCtx) {
return Result::Fatal("Could not create context.");
}
SkASSERT(mainCtx->priv().getGpu());
// TODO: make use of 'otherCtx' for uploads & compilation
#if 0
// This captures the context destined to be the utility context. It is in a share group
// with the main context
ContextInfo otherCtxInfo = factory.getSharedContextInfo(mainCtx);
sk_gpu_test::TestContext* otherTestCtx = otherCtxInfo.testContext();
GrContext* otherCtx = otherCtxInfo.grContext();
if (!otherCtx) {
return Result::Fatal("Cound not create shared context.");
}
SkASSERT(otherCtx->priv().getGpu());
#endif
SkTaskGroup recordingTaskGroup(*fRecordingThreadPool);
SkTaskGroup gpuTaskGroup(*fGPUThread);
// Make sure 'mainCtx' is current
mainTestCtx->makeCurrent();
GrBackendTexture backendTexture;
GrBackendRenderTarget backendRT;
sk_sp<SkSurface> surface = this->createDstSurface(mainCtx, src.size(),
&backendTexture, &backendRT);
if (!surface) {
return Result::Fatal("Could not create a surface.");
}
// 'mainCtx' is being shifted to the gpuThread. Leave the main thread w/o
// a context.
mainTestCtx->makeNotCurrent();
// Job one for the GPU thread is to make 'mainCtx' current!
gpuTaskGroup.add([mainTestCtx] { mainTestCtx->makeCurrent(); });
Result result = this->ddlDraw(src, surface, &recordingTaskGroup, &gpuTaskGroup, mainCtx);
// ddlDraw schedules a flush on the gpu thread and waits so it is safe to make 'mainCtx'
// current here.
gpuTaskGroup.add([mainTestCtx] { mainTestCtx->makeNotCurrent(); });
if (!result.isOk()) {
return result;
}
mainTestCtx->makeCurrent();
if (FLAGS_gpuStats) {
mainCtx->priv().dumpCacheStats(log);
mainCtx->priv().dumpGpuStats(log);
#if 0
otherCtx->priv().dumpCacheStats(log);
otherCtx->priv().dumpGpuStats(log);
#endif
}
if (!this->readBack(surface.get(), dst)) {
return Result::Fatal("Could not readback from surface.");
}
surface.reset();
if (backendTexture.isValid()) {
mainCtx->deleteBackendTexture(backendTexture);
}
if (backendRT.isValid()) {
mainCtx->priv().getGpu()->deleteTestingOnlyBackendRenderTarget(backendRT);
}
return Result::Ok();
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
static Result draw_skdocument(const Src& src, SkDocument* doc, SkWStream* dst) {
if (src.size().isEmpty()) {

View File

@ -450,6 +450,33 @@ private:
typedef GPUSink INHERITED;
};
// This sink attempts to better simulate the Chrome DDL use-case. It:
// creates the DDLs on separate recording threads
// performs all the GPU work on a separate GPU thread
// In the future this should be expanded to:
// upload on a utility thread w/ access to a shared context
// compile the programs on the utility thread
// perform fine grained scheduling of gpu tasks based on their image and program prerequisites
// create a single "compositing" DDL that is replayed last
class GPUDDLSink : public GPUSink {
public:
GPUDDLSink(const SkCommandLineConfigGpu*, const GrContextOptions&);
Result draw(const Src&, SkBitmap*, SkWStream*, SkString*) const override;
private:
Result ddlDraw(const Src&,
sk_sp<SkSurface> dstSurface,
SkTaskGroup* recordingTaskGroup,
SkTaskGroup* gpuTaskGroup,
GrContext* gpuCtx) const;
std::unique_ptr<SkExecutor> fRecordingThreadPool;
std::unique_ptr<SkExecutor> fGPUThread;
typedef GPUSink INHERITED;
};
class PDFSink : public Sink {
public:
PDFSink(bool pdfa, SkScalar rasterDpi) : fPDFA(pdfa), fRasterDpi(rasterDpi) {}

View File

@ -71,6 +71,7 @@ static const struct {
{ "gltestglslcache", "gpu", "api=gl,testPersistentCache=2" },
{ "gltestprecompile", "gpu", "api=gl,testPrecompile=true" },
{ "glestestprecompile", "gpu", "api=gles,testPrecompile=true" },
{ "glddl", "gpu", "api=gl,useDDLSink=true" },
{ "angle_d3d11_es2", "gpu", "api=angle_d3d11_es2" },
{ "angle_d3d11_es3", "gpu", "api=angle_d3d11_es3" },
{ "angle_d3d9_es2", "gpu", "api=angle_d3d9_es2" },
@ -101,12 +102,14 @@ static const struct {
{ "vkbetex", "gpu", "api=vulkan,surf=betex" },
{ "vkbert", "gpu", "api=vulkan,surf=bert" },
{ "vktestpersistentcache", "gpu", "api=vulkan,testPersistentCache=1" },
{ "vkddl", "gpu", "api=vulkan,useDDLSink=true" },
#endif
#ifdef SK_METAL
{ "mtl", "gpu", "api=metal" },
{ "mtl1010102", "gpu", "api=metal,color=1010102" },
{ "mtlmsaa4", "gpu", "api=metal,samples=4" },
{ "mtlmsaa8", "gpu", "api=metal,samples=8" },
{ "mtlddl", "gpu", "api=metal,useDDLSink=true" },
#endif
#ifdef SK_DIRECT3D
{ "d3d", "gpu", "api=direct3d" },
@ -455,6 +458,7 @@ SkCommandLineConfigGpu::SkCommandLineConfigGpu(const SkString& tag,
bool testThreading,
int testPersistentCache,
bool testPrecompile,
bool useDDLSink,
SurfType surfType)
: SkCommandLineConfig(tag, SkString("gpu"), viaParts)
, fContextType(contextType)
@ -467,6 +471,7 @@ SkCommandLineConfigGpu::SkCommandLineConfigGpu(const SkString& tag,
, fTestThreading(testThreading)
, fTestPersistentCache(testPersistentCache)
, fTestPrecompile(testPrecompile)
, fUseDDLSink(useDDLSink)
, fSurfType(surfType) {
if (!useStencilBuffers) {
fContextOverrides |= ContextOverrides::kAvoidStencilBuffers;
@ -487,6 +492,7 @@ SkCommandLineConfigGpu* parse_command_line_config_gpu(const SkString&
bool testThreading = false;
int testPersistentCache = 0;
bool testPrecompile = false;
bool useDDLs = false;
SkCommandLineConfigGpu::SurfType surfType = SkCommandLineConfigGpu::SurfType::kDefault;
bool parseSucceeded = false;
@ -504,6 +510,7 @@ SkCommandLineConfigGpu* parse_command_line_config_gpu(const SkString&
extendedOptions.get_option_bool("testThreading", &testThreading) &&
extendedOptions.get_option_int("testPersistentCache", &testPersistentCache) &&
extendedOptions.get_option_bool("testPrecompile", &testPrecompile) &&
extendedOptions.get_option_bool("useDDLs", &useDDLs) &&
extendedOptions.get_option_gpu_surf_type("surf", &surfType);
// testing threading and the persistent cache are mutually exclusive.
@ -523,6 +530,7 @@ SkCommandLineConfigGpu* parse_command_line_config_gpu(const SkString&
testThreading,
testPersistentCache,
testPrecompile,
useDDLs,
surfType);
}

View File

@ -63,6 +63,7 @@ public:
bool testThreading,
int testPersistentCache,
bool testPrecompile,
bool useDDLSink,
SurfType);
const SkCommandLineConfigGpu* asConfigGpu() const override { return this; }
@ -76,6 +77,7 @@ public:
bool getTestThreading() const { return fTestThreading; }
int getTestPersistentCache() const { return fTestPersistentCache; }
bool getTestPrecompile() const { return fTestPrecompile; }
bool getUseDDLSink() const { return fUseDDLSink; }
SurfType getSurfType() const { return fSurfType; }
private:
@ -89,6 +91,7 @@ private:
bool fTestThreading;
int fTestPersistentCache;
bool fTestPrecompile;
bool fUseDDLSink;
SurfType fSurfType;
};