skia2/bench/SKPBench.cpp
Michael Ludwig ce2e111e7c [graphite] Make nanobench measure the right amount of work in SKPs
SKPBench tiles the SkPicture into multiple surfaces, with tile WH
differing between GPU and CPU backends. Graphite was being incorrectly
classified as a CPU backend and ended up using smaller tile sizes, so
the SkPicture would be played back many more times relative to Ganesh.
In addition, each surface's contents is a subset of the total picture,
so batching was artificially limited compared to Ganesh.

Added a call to Device::flushPendingWorkToRecorder() in
Surface_Graphite::onFlush(). This ensures DrawPass::Make() is called
when nanobench and viewer are measuring the bulk of the work
(viewer's "flush" time was always 0 for Graphite since the
DrawPass::Make was only being counted in the total time when it
was executed for swapBuffers()). Flushing in this manner also prevents
batching across loops in nanobench, or resetting/clearing prior loops
recorded draws when the benchmark starts with a fullscreen clear.

The SKPBench change should make all graphite benchmarks report lower
times compared to what's in perf.skia.org. The flush change should
increase their reported times for benchmarks that required multiple
loops to get an accurate time measurement (for expensive SKPs with
loops == 1, it shouldn't be affected).

Change-Id: I9256dbfc4c7c021377be8f5137b48036cc67e4a2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/548157
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
2022-06-08 15:43:00 +00:00

187 lines
6.4 KiB
C++

/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "bench/SKPBench.h"
#include "include/core/SkSurface.h"
#include "include/gpu/GrDirectContext.h"
#include "src/gpu/ganesh/GrDirectContextPriv.h"
#include "tools/flags/CommandLineFlags.h"
// These CPU tile sizes are not good per se, but they are similar to what Chrome uses.
static DEFINE_int(CPUbenchTileW, 256, "Tile width used for CPU SKP playback.");
static DEFINE_int(CPUbenchTileH, 256, "Tile height used for CPU SKP playback.");
static DEFINE_int(GPUbenchTileW, 1600, "Tile width used for GPU SKP playback.");
static DEFINE_int(GPUbenchTileH, 512, "Tile height used for GPU SKP playback.");
SKPBench::SKPBench(const char* name, const SkPicture* pic, const SkIRect& clip, SkScalar scale,
bool doLooping)
: fPic(SkRef(pic))
, fClip(clip)
, fScale(scale)
, fName(name)
, fDoLooping(doLooping) {
fUniqueName.printf("%s_%.2g", name, scale); // Scale makes this unqiue for perf.skia.org traces.
}
SKPBench::~SKPBench() {
for (int i = 0; i < fSurfaces.count(); ++i) {
fSurfaces[i]->unref();
}
}
const char* SKPBench::onGetName() {
return fName.c_str();
}
const char* SKPBench::onGetUniqueName() {
return fUniqueName.c_str();
}
void SKPBench::onPerCanvasPreDraw(SkCanvas* canvas) {
SkIRect bounds = canvas->getDeviceClipBounds();
bounds.intersect(fClip);
bounds.intersect(fPic->cullRect().roundOut());
SkAssertResult(!bounds.isEmpty());
#if defined(SK_GRAPHITE_ENABLED)
const bool gpu = canvas->recordingContext() != nullptr || canvas->recorder() != nullptr;
#else
const bool gpu = canvas->recordingContext() != nullptr;
#endif
int tileW = gpu ? FLAGS_GPUbenchTileW : FLAGS_CPUbenchTileW,
tileH = gpu ? FLAGS_GPUbenchTileH : FLAGS_CPUbenchTileH;
tileW = std::min(tileW, bounds.width());
tileH = std::min(tileH, bounds.height());
int xTiles = SkScalarCeilToInt(bounds.width() / SkIntToScalar(tileW));
int yTiles = SkScalarCeilToInt(bounds.height() / SkIntToScalar(tileH));
fSurfaces.reserve_back(xTiles * yTiles);
fTileRects.setReserve(xTiles * yTiles);
SkImageInfo ii = canvas->imageInfo().makeWH(tileW, tileH);
for (int y = bounds.fTop; y < bounds.fBottom; y += tileH) {
for (int x = bounds.fLeft; x < bounds.fRight; x += tileW) {
const SkIRect tileRect = SkIRect::MakeXYWH(x, y, tileW, tileH);
*fTileRects.append() = tileRect;
fSurfaces.emplace_back(canvas->makeSurface(ii));
// Never want the contents of a tile to include stuff the parent
// canvas clips out
SkRect clip = SkRect::Make(bounds);
clip.offset(-SkIntToScalar(tileRect.fLeft), -SkIntToScalar(tileRect.fTop));
fSurfaces.back()->getCanvas()->clipRect(clip);
fSurfaces.back()->getCanvas()->setMatrix(canvas->getLocalToDevice());
fSurfaces.back()->getCanvas()->scale(fScale, fScale);
}
}
}
void SKPBench::onPerCanvasPostDraw(SkCanvas* canvas) {
// Draw the last set of tiles into the main canvas in case we're
// saving the images
for (int i = 0; i < fTileRects.count(); ++i) {
sk_sp<SkImage> image(fSurfaces[i]->makeImageSnapshot());
canvas->drawImage(image,
SkIntToScalar(fTileRects[i].fLeft), SkIntToScalar(fTileRects[i].fTop));
}
fSurfaces.reset();
fTileRects.rewind();
}
bool SKPBench::isSuitableFor(Backend backend) {
return backend != kNonRendering_Backend;
}
SkIPoint SKPBench::onGetSize() {
return SkIPoint::Make(fClip.width(), fClip.height());
}
void SKPBench::onDraw(int loops, SkCanvas* canvas) {
SkASSERT(fDoLooping || 1 == loops);
while (1) {
this->drawPicture();
if (0 == --loops) {
break;
}
auto direct = canvas->recordingContext() ? canvas->recordingContext()->asDirectContext()
: nullptr;
// Ensure the GrContext doesn't combine ops across draw loops.
if (direct) {
direct->flushAndSubmit();
}
}
}
void SKPBench::drawMPDPicture() {
// TODO: remove me
}
void SKPBench::drawPicture() {
for (int j = 0; j < fTileRects.count(); ++j) {
const SkMatrix trans = SkMatrix::Translate(-fTileRects[j].fLeft / fScale,
-fTileRects[j].fTop / fScale);
fSurfaces[j]->getCanvas()->drawPicture(fPic.get(), &trans, nullptr);
}
for (int j = 0; j < fTileRects.count(); ++j) {
fSurfaces[j]->flush();
}
}
#include "src/gpu/ganesh/GrGpu.h"
static void draw_pic_for_stats(SkCanvas* canvas,
GrDirectContext* dContext,
const SkPicture* picture,
SkTArray<SkString>* keys,
SkTArray<double>* values) {
dContext->priv().resetGpuStats();
dContext->priv().resetContextStats();
canvas->drawPicture(picture);
dContext->flush();
dContext->priv().dumpGpuStatsKeyValuePairs(keys, values);
dContext->priv().dumpCacheStatsKeyValuePairs(keys, values);
dContext->priv().dumpContextStatsKeyValuePairs(keys, values);
}
void SKPBench::getGpuStats(SkCanvas* canvas, SkTArray<SkString>* keys, SkTArray<double>* values) {
// we do a special single draw and then dump the key / value pairs
auto direct = canvas->recordingContext() ? canvas->recordingContext()->asDirectContext()
: nullptr;
if (!direct) {
return;
}
// TODO refactor this out if we want to test other subclasses of skpbench
direct->flushAndSubmit();
direct->freeGpuResources();
direct->resetContext();
direct->priv().getGpu()->resetShaderCacheForTesting();
draw_pic_for_stats(canvas, direct, fPic.get(), keys, values);
}
bool SKPBench::getDMSAAStats(GrRecordingContext* rContext) {
if (!rContext || !rContext->asDirectContext()) {
return false;
}
// Clear the current DMSAA stats then do a single tiled draw that resets them to the specific
// values for our SKP.
rContext->asDirectContext()->flushAndSubmit();
rContext->priv().dmsaaStats() = {};
this->drawPicture(); // Draw tiled for DMSAA stats.
rContext->asDirectContext()->flush();
return true;
}