Make use of shareable promise images in tools

The main change here is just fusing the following three methods into recreateSKP:
   deflateSKP
   createCallbackContexts
   createSKP

All the remaining changes are just API fallout from that.

Bug: skia:11728
Change-Id: Iae2ce65983ad56c8288bdc830e248394a0055bfb
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/389925
Reviewed-by: Adlai Holler <adlai@google.com>
Commit-Queue: Robert Phillips <robertphillips@google.com>
This commit is contained in:
Robert Phillips 2021-03-29 13:29:40 -04:00 committed by Skia Commit-Bot
parent 3079266f92
commit 0d8722c8b5
7 changed files with 87 additions and 111 deletions

View File

@ -1813,13 +1813,11 @@ Result GPUDDLSink::ddlDraw(const Src& src,
SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADataTypes(*dContext);
DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
sk_sp<SkData> compressedPictureData = promiseImageHelper.deflateSKP(inputPicture.get());
if (!compressedPictureData) {
return Result::Fatal("GPUDDLSink: Couldn't deflate SkPicture");
sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture.get());
if (!newSKP) {
return Result::Fatal("GPUDDLSink: Couldn't recreate the SKP");
}
promiseImageHelper.createCallbackContexts(dContext);
// 'gpuTestCtx/gpuThreadCtx' is being shifted to the gpuThread. Leave the main (this)
// thread w/o a context.
gpuTestCtx->makeNotCurrent();
@ -1840,10 +1838,7 @@ Result GPUDDLSink::ddlDraw(const Src& src,
tiles.createBackendTextures(gpuTaskGroup, dContext);
// Reinflate the compressed picture.
tiles.createSKP(dContext->threadSafeProxy(), compressedPictureData.get(), promiseImageHelper);
tiles.kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext);
tiles.kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, newSKP.get());
// We have to wait for the recording threads to schedule all their work on the gpu thread
// before we can schedule the composition draw and the flush. Note that the gpu thread

View File

@ -285,14 +285,7 @@ static sk_sp<SkPicture> create_shared_skp(const char* src,
exitf("failed to parse file %s", srcfile.c_str());
}
sk_sp<SkData> compressedPictureData = promiseImageHelper->deflateSKP(skp.get());
if (!compressedPictureData) {
exitf("skp deflation failed %s", srcfile.c_str());
}
// TODO: use the new shared promise images to just create one skp here
return skp;
return promiseImageHelper->recreateSKP(dContext, skp.get());
}
static void check_params(GrDirectContext* dContext,
@ -383,8 +376,6 @@ int main(int argc, char** argv) {
check_params(mainContext->fDirectContext, width, height, ct, at, FLAGS_numSamples);
promiseImageHelper.createCallbackContexts(mainContext->fDirectContext);
// TODO: do this later on a utility thread!
promiseImageHelper.uploadAllToGPU(nullptr, mainContext->fDirectContext);

View File

@ -94,7 +94,8 @@ void PromiseImageCallbackContext::destroyBackendTexture() {
///////////////////////////////////////////////////////////////////////////////////////////////////
sk_sp<SkData> DDLPromiseImageHelper::deflateSKP(const SkPicture* inputPicture) {
sk_sp<SkPicture> DDLPromiseImageHelper::recreateSKP(GrDirectContext* dContext,
SkPicture* inputPicture) {
SkSerialProcs procs;
procs.fImageCtx = this;
@ -107,7 +108,14 @@ sk_sp<SkData> DDLPromiseImageHelper::deflateSKP(const SkPicture* inputPicture) {
return SkData::MakeWithCopy(&id, sizeof(id));
};
return inputPicture->serialize(&procs);
sk_sp<SkData> compressedPictureData = inputPicture->serialize(&procs);
if (!compressedPictureData) {
return nullptr;
}
this->createCallbackContexts(dContext);
return this->reinflateSKP(dContext->threadSafeProxy(), compressedPictureData.get());
}
static GrBackendTexture create_yuva_texture(GrDirectContext* direct,
@ -279,9 +287,8 @@ void DDLPromiseImageHelper::deleteAllFromGPU(SkTaskGroup* taskGroup, GrDirectCon
sk_sp<SkPicture> DDLPromiseImageHelper::reinflateSKP(
sk_sp<GrContextThreadSafeProxy> threadSafeProxy,
SkData* compressedPictureData,
SkTArray<sk_sp<SkImage>>* promiseImages) const {
DeserialImageProcContext procContext { std::move(threadSafeProxy), this, promiseImages };
SkData* compressedPictureData) {
DeserialImageProcContext procContext { std::move(threadSafeProxy), this };
SkDeserialProcs procs;
procs.fImageCtx = (void*) &procContext;
@ -290,13 +297,12 @@ sk_sp<SkPicture> DDLPromiseImageHelper::reinflateSKP(
return SkPicture::MakeFromData(compressedPictureData, &procs);
}
// This generates promise images to replace the indices in the compressed picture. This
// reconstitution is performed separately in each thread so we end up with multiple
// promise images referring to the same GrBackendTexture.
// This generates promise images to replace the indices in the compressed picture.
sk_sp<SkImage> DDLPromiseImageHelper::CreatePromiseImages(const void* rawData,
size_t length, void* ctxIn) {
size_t length,
void* ctxIn) {
DeserialImageProcContext* procContext = static_cast<DeserialImageProcContext*>(ctxIn);
const DDLPromiseImageHelper* helper = procContext->fHelper;
DDLPromiseImageHelper* helper = procContext->fHelper;
SkASSERT(length == sizeof(int));
@ -308,7 +314,7 @@ sk_sp<SkImage> DDLPromiseImageHelper::CreatePromiseImages(const void* rawData,
const DDLPromiseImageHelper::PromiseImageInfo& curImage = helper->getInfo(*indexPtr);
// If there is no callback context that means 'createCallbackContexts' determined the
// texture wouldn't fit on the GPU. Create a separate bitmap-backed image for each thread.
// texture wouldn't fit on the GPU. Create a bitmap-backed image.
if (!curImage.isYUV() && !curImage.callbackContext(0)) {
SkASSERT(curImage.baseLevel().isImmutable());
return curImage.baseLevel().asImage();
@ -361,7 +367,7 @@ sk_sp<SkImage> DDLPromiseImageHelper::CreatePromiseImages(const void* rawData,
(void*)curImage.refCallbackContext(0).release());
curImage.callbackContext(0)->wasAddedToImage();
}
procContext->fPromiseImages->push_back(image);
helper->fPromiseImages.push_back(image);
SkASSERT(image);
return image;
}

View File

@ -87,14 +87,13 @@ private:
//
// The way this works is:
// the original skp is converted to SkData and all its image info is extracted into this
// class and only indices into this class are left in the SkData (via deflateSKP)
// class and only indices into this class are left in the SkData
// the PromiseImageCallbackContexts are created for each image
// the SkData is then reinflated into an SkPicture with promise images replacing all the indices
// (all in recreateSKP)
//
// Prior to replaying in threads, all the images stored in this class are uploaded to the
// gpu and PromiseImageCallbackContexts are created for them (via uploadAllToGPU)
//
// Each thread reinflates the SkData into an SkPicture replacing all the indices w/
// promise images (all using the same GrBackendTexture and getting a ref to the
// appropriate PromiseImageCallbackContext) (via reinflateSKP).
// Prior to replaying in threads, all the images are uploaded to the gpu
// (in uploadAllToGPU)
//
// This class is then reset - dropping all of its refs on the PromiseImageCallbackContexts
//
@ -110,23 +109,24 @@ public:
: fSupportedYUVADataTypes(supportedYUVADataTypes) {}
~DDLPromiseImageHelper() = default;
// Convert the SkPicture into SkData replacing all the SkImages with an index.
sk_sp<SkData> deflateSKP(const SkPicture* inputPicture);
void createCallbackContexts(GrDirectContext*);
// Convert the input SkPicture into a new one which has promise images rather than live
// images.
sk_sp<SkPicture> recreateSKP(GrDirectContext*, SkPicture*);
void uploadAllToGPU(SkTaskGroup*, GrDirectContext*);
void deleteAllFromGPU(SkTaskGroup*, GrDirectContext*);
// reinflate a deflated SKP, replacing all the indices with promise images.
sk_sp<SkPicture> reinflateSKP(sk_sp<GrContextThreadSafeProxy>,
SkData* compressedPicture,
SkTArray<sk_sp<SkImage>>* promiseImages) const;
// Remove this class' refs on the PromiseImageCallbackContexts
void reset() { fImageInfo.reset(); }
// Remove this class' refs on the promise images and the PromiseImageCallbackContexts
void reset() {
fImageInfo.reset();
fPromiseImages.reset();
}
private:
void createCallbackContexts(GrDirectContext*);
// reinflate a deflated SKP, replacing all the indices with promise images.
sk_sp<SkPicture> reinflateSKP(sk_sp<GrContextThreadSafeProxy>, SkData* deflatedSKP);
// This is the information extracted into this class from the parsing of the skp file.
// Once it has all been uploaded to the GPU and distributed to the promise images, it
// is all dropped via "reset".
@ -213,8 +213,7 @@ private:
struct DeserialImageProcContext {
sk_sp<GrContextThreadSafeProxy> fThreadSafeProxy;
const DDLPromiseImageHelper* fHelper;
SkTArray<sk_sp<SkImage>>* fPromiseImages;
DDLPromiseImageHelper* fHelper;
};
static void CreateBETexturesForPromiseImage(GrDirectContext*, PromiseImageInfo*);
@ -236,7 +235,11 @@ private:
int findOrDefineImage(SkImage* image);
SkYUVAPixmapInfo::SupportedDataTypes fSupportedYUVADataTypes;
SkTArray<PromiseImageInfo> fImageInfo;
SkTArray<PromiseImageInfo> fImageInfo;
// TODO: review the use of 'fPromiseImages' - it doesn't seem useful/necessary
SkTArray<sk_sp<SkImage>> fPromiseImages; // All the promise images in the
// reconstituted picture
};
#endif

View File

@ -259,26 +259,17 @@ DDLTileHelper::DDLTileHelper(GrDirectContext* direct,
}
}
void DDLTileHelper::createSKP(sk_sp<GrContextThreadSafeProxy> threadSafeProxy,
SkData* compressedPictureData,
const DDLPromiseImageHelper& helper) {
SkASSERT(!fReconstitutedPicture);
fReconstitutedPicture = helper.reinflateSKP(std::move(threadSafeProxy), compressedPictureData,
&fPromiseImages);
}
void DDLTileHelper::createDDLsInParallel() {
void DDLTileHelper::createDDLsInParallel(SkPicture* picture) {
#if 1
SkTaskGroup().batch(this->numTiles(), [&](int i) {
fTiles[i].createDDL(fReconstitutedPicture.get());
fTiles[i].createDDL(picture);
});
SkTaskGroup().add([this]{ this->createComposeDDL(); });
SkTaskGroup().wait();
#else
// Use this code path to debug w/o threads
for (int i = 0; i < this->numTiles(); ++i) {
fTiles[i].createDDL(fReconstitutedPicture.get());
fTiles[i].createDDL(picture);
}
this->createComposeDDL();
#endif
@ -301,7 +292,8 @@ static void do_gpu_stuff(GrDirectContext* direct, DDLTileHelper::TileData* tile)
// We expect to have more than one recording thread but just one gpu thread
void DDLTileHelper::kickOffThreadedWork(SkTaskGroup* recordingTaskGroup,
SkTaskGroup* gpuTaskGroup,
GrDirectContext* dContext) {
GrDirectContext* dContext,
SkPicture* picture) {
SkASSERT(recordingTaskGroup && gpuTaskGroup && dContext);
for (int i = 0; i < this->numTiles(); ++i) {
@ -315,8 +307,8 @@ void DDLTileHelper::kickOffThreadedWork(SkTaskGroup* recordingTaskGroup,
// schedule gpu-thread processing of the DDL
// Note: a finer grained approach would be add a scheduling task which would evaluate
// which DDLs were ready to be rendered based on their prerequisites
recordingTaskGroup->add([this, tile, gpuTaskGroup, dContext]() {
tile->createDDL(fReconstitutedPicture.get());
recordingTaskGroup->add([tile, gpuTaskGroup, dContext, picture]() {
tile->createDDL(picture);
gpuTaskGroup->add([dContext, tile]() {
do_gpu_stuff(dContext, tile);
@ -328,17 +320,17 @@ void DDLTileHelper::kickOffThreadedWork(SkTaskGroup* recordingTaskGroup,
}
// Only called from skpbench
void DDLTileHelper::interleaveDDLCreationAndDraw(GrDirectContext* direct) {
void DDLTileHelper::interleaveDDLCreationAndDraw(GrDirectContext* dContext, SkPicture* picture) {
for (int i = 0; i < this->numTiles(); ++i) {
fTiles[i].createDDL(fReconstitutedPicture.get());
fTiles[i].draw(direct);
fTiles[i].createDDL(picture);
fTiles[i].draw(dContext);
}
}
// Only called from skpbench
void DDLTileHelper::drawAllTilesDirectly(GrDirectContext* context) {
void DDLTileHelper::drawAllTilesDirectly(GrDirectContext* dContext, SkPicture* picture) {
for (int i = 0; i < this->numTiles(); ++i) {
fTiles[i].drawSKPDirectly(context, fReconstitutedPicture.get());
fTiles[i].drawSKPDirectly(dContext, picture);
}
}

View File

@ -104,17 +104,12 @@ public:
int numXDivisions, int numYDivisions,
bool addRandomPaddingToDst);
// TODO: Move this to PromiseImageHelper and have one method that does all the work and
// returns the shared SkP.
void createSKP(sk_sp<GrContextThreadSafeProxy>,
SkData* compressedPictureData,
const DDLPromiseImageHelper&);
void kickOffThreadedWork(SkTaskGroup* recordingTaskGroup,
SkTaskGroup* gpuTaskGroup,
GrDirectContext*);
GrDirectContext*,
SkPicture*);
void createDDLsInParallel();
void createDDLsInParallel(SkPicture*);
// Create the DDL that will compose all the tile images into a final result.
void createComposeDDL();
@ -125,11 +120,11 @@ public:
// DDL creations and draws are interleaved to prevent starvation of the GPU.
// Note: this is somewhat of a misuse/pessimistic-use of DDLs since they are supposed to
// be created on a separate thread.
void interleaveDDLCreationAndDraw(GrDirectContext*);
void interleaveDDLCreationAndDraw(GrDirectContext*, SkPicture*);
// This draws all the per-tile SKPs directly into all of the tiles w/o converting them to
// DDLs first - all on a single thread.
void drawAllTilesDirectly(GrDirectContext*);
void drawAllTilesDirectly(GrDirectContext*, SkPicture*);
void dropCallbackContexts();
void resetAllTiles();
@ -147,9 +142,6 @@ private:
sk_sp<SkDeferredDisplayList> fComposeDDL;
const SkSurfaceCharacterization fDstCharacterization;
sk_sp<SkPicture> fReconstitutedPicture;
SkTArray<sk_sp<SkImage>> fPromiseImages; // All the promise images in the
// reconstituted picture
};
#endif

View File

@ -207,9 +207,10 @@ private:
std::vector<SkDocumentPage> fFrames;
};
static void ddl_sample(GrDirectContext* context, DDLTileHelper* tiles, GpuSync& gpuSync,
static void ddl_sample(GrDirectContext* dContext, DDLTileHelper* tiles, GpuSync& gpuSync,
Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
std::chrono::high_resolution_clock::time_point* startStopTime) {
std::chrono::high_resolution_clock::time_point* startStopTime,
SkPicture* picture) {
using clock = std::chrono::high_resolution_clock;
clock::time_point start = *startStopTime;
@ -221,23 +222,23 @@ static void ddl_sample(GrDirectContext* context, DDLTileHelper* tiles, GpuSync&
// thread. The interleaving is so that we don't starve the GPU.
// One unfortunate side effect of this is that we can't delete the DDLs until after
// the GPU work is flushed.
tiles->interleaveDDLCreationAndDraw(context);
tiles->interleaveDDLCreationAndDraw(dContext, picture);
} else if (FLAGS_comparableSKP) {
// In this mode simply draw the re-inflated per-tile SKPs directly to the GPU w/o going
// through a DDL.
tiles->drawAllTilesDirectly(context);
tiles->drawAllTilesDirectly(dContext, picture);
} else {
tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, context);
tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, picture);
recordingTaskGroup->wait();
}
if (gpuTaskGroup) {
gpuTaskGroup->add([&]{
flush_with_sync(context, gpuSync);
flush_with_sync(dContext, gpuSync);
});
gpuTaskGroup->wait();
} else {
flush_with_sync(context, gpuSync);
flush_with_sync(dContext, gpuSync);
}
*startStopTime = clock::now();
@ -248,7 +249,7 @@ static void ddl_sample(GrDirectContext* context, DDLTileHelper* tiles, GpuSync&
}
}
static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectContext *context,
static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectContext *dContext,
sk_sp<SkSurface> dstSurface, SkPicture* inputPicture,
std::vector<Sample>* samples) {
using clock = std::chrono::high_resolution_clock;
@ -260,24 +261,20 @@ static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectCon
SkIRect viewport = dstSurface->imageInfo().bounds();
SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADataTypes(*context);
SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADataTypes(*dContext);
DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
sk_sp<SkData> compressedPictureData = promiseImageHelper.deflateSKP(inputPicture);
if (!compressedPictureData) {
sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture);
if (!newSKP) {
exitf(ExitErr::kUnavailable, "DDL: conversion of skp failed");
}
promiseImageHelper.createCallbackContexts(context);
promiseImageHelper.uploadAllToGPU(nullptr, dContext);
promiseImageHelper.uploadAllToGPU(nullptr, context);
DDLTileHelper tiles(context, dstCharacterization, viewport,
DDLTileHelper tiles(dContext, dstCharacterization, viewport,
FLAGS_ddlTilingWidthHeight, FLAGS_ddlTilingWidthHeight,
/* addRandomPaddingToDst */ false);
tiles.createBackendTextures(nullptr, context);
tiles.createSKP(context->threadSafeProxy(), compressedPictureData.get(), promiseImageHelper);
tiles.createBackendTextures(nullptr, dContext);
// In comparable modes, there is no GPU thread. The following pointers are all null.
// Otherwise, we transfer testContext onto the GPU thread until after the bench.
@ -297,8 +294,8 @@ static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectCon
clock::time_point startStopTime = clock::now();
GpuSync gpuSync;
ddl_sample(context, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
gpuTaskGroup.get(), &startStopTime);
ddl_sample(dContext, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
gpuTaskGroup.get(), &startStopTime, newSKP.get());
clock::duration cumulativeDuration = std::chrono::milliseconds(0);
@ -308,8 +305,8 @@ static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectCon
do {
tiles.resetAllTiles();
ddl_sample(context, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
gpuTaskGroup.get(), &startStopTime);
ddl_sample(dContext, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
gpuTaskGroup.get(), &startStopTime, newSKP.get());
} while (sample.fDuration < sampleDuration);
cumulativeDuration += sample.fDuration;
@ -334,12 +331,12 @@ static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectCon
// Make sure the gpu has finished all its work before we exit this function and delete the
// fence.
context->flush();
context->submit(true);
dContext->flush();
dContext->submit(true);
promiseImageHelper.deleteAllFromGPU(nullptr, context);
promiseImageHelper.deleteAllFromGPU(nullptr, dContext);
tiles.deleteBackendTextures(nullptr, context);
tiles.deleteBackendTextures(nullptr, dContext);
}