Adding a trace to nanobench for RP/VM comparison

Run nanobench with --compare key and collect data for comparison:
./out/Release/nanobench --csv --config 8888 --skvm --compare --loops 100 --samples 1 --match $(ls skps | grep --invert-match svg ) 2>&1 | tee VM.data
./out/Release/nanobench --csv --config 8888 --forceRasterPipeline --compare --loops 100 --samples 1 --match $(ls skps | grep --invert-match svg ) 2>&1 | tee RP.data
awk 'BEGIN {OFS=","; fileNum = 0} ($2 ~ /MB/) && fileNum == 0 {vmvmcycles[$3] = $6; vmvmscan[$3] = $8; vmvmpixels[$3] = $10; vmvminterp[$3] = $11;  vmrpcycle[$3] = $14; vmrpscan[$3] = $16; vmrppixels[$3] = $18} ($2 ~ /MB/) && fileNum == 1  {print $3, vmvmcycles[$3], vmvmscan[$3], vmvmpixels[$3], vmvminterp[$3], $6, $8, $10, $11, $14, $16, $18} ENDFILE {fileNum += 1}' VM.data RP.data > compare.csv

You can see an example of comparison table here:
https://docs.google.com/spreadsheets/d/1Q57oz6Jn8JPQkPzUbtO0fcZh2VhwPhxwsLaHnSb9uR0/edit#gid=406063636

Change-Id: I8fa35e3fb087bce00ab19355a3bc021334aa7a80
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/501337
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
This commit is contained in:
Julia Lavrova 2022-01-27 15:00:29 -05:00 committed by SkCQ
parent a7a2324f14
commit e855b1273f
9 changed files with 253 additions and 15 deletions

View File

@ -36,6 +36,7 @@
#include "src/core/SkOSFile.h"
#include "src/core/SkTaskGroup.h"
#include "src/core/SkTraceEvent.h"
#include "src/utils/SkBlitterTrace.h"
#include "src/utils/SkJSONWriter.h"
#include "src/utils/SkOSPath.h"
#include "src/utils/SkShaderUtils.h"
@ -72,6 +73,9 @@ extern bool gUseSkVMBlitter;
extern bool gSkVMAllowJIT;
extern bool gSkVMJITViaDylib;
extern SkBlitterTrace gSkVMBlitterTrace;
extern SkBlitterTrace gSkRPBlitterTrace;
#ifndef SK_BUILD_FOR_WIN
#include <unistd.h>
@ -191,6 +195,8 @@ static DEFINE_string(properties, "",
static DEFINE_bool(purgeBetweenBenches, false,
"Call SkGraphics::PurgeAllCaches() between each benchmark?");
static DEFINE_bool(compare, false, "Prepare results for Raster Pipeline/SkVM comparison.");
static double now_ms() { return SkTime::GetNSecs() * 1e-6; }
static SkString humanize(double ms) {
@ -281,6 +287,9 @@ struct GPUTarget : public Target {
}
};
static SkBlitterTrace gSkVMBlitterTraceCapture("VM");
static SkBlitterTrace gSkRPBlitterTraceCapture("RP");
static double time(int loops, Benchmark* bench, Target* target) {
SkCanvas* canvas = target->getCanvas();
if (canvas) {
@ -289,7 +298,13 @@ static double time(int loops, Benchmark* bench, Target* target) {
bench->preDraw(canvas);
double start = now_ms();
canvas = target->beginTiming(canvas);
gSkVMBlitterTrace.reset();
gSkRPBlitterTrace.reset();
bench->draw(loops, canvas);
gSkVMBlitterTraceCapture = gSkVMBlitterTrace;
gSkRPBlitterTraceCapture = gSkRPBlitterTrace;
target->endTiming();
double elapsed = now_ms() - start;
bench->postDraw(canvas);
@ -1266,6 +1281,11 @@ int main(int argc, char** argv) {
gSkVMAllowJIT = FLAGS_jit;
gSkVMJITViaDylib = FLAGS_dylib;
if (FLAGS_compare) {
gSkVMBlitterTrace.turnTrace(true);
gSkRPBlitterTrace.turnTrace(true);
}
int runs = 0;
BenchmarkStream benchStream;
log.beginObject("results");
@ -1414,11 +1434,18 @@ int main(int argc, char** argv) {
if (configs.count() == 1) {
config = ""; // Only print the config if we run the same bench on more than one.
}
SkDebugf("%4d/%-4dMB\t%s\t%s\n"
SkDebugf("%4d/%-4dMB\t%s\t%s "
, sk_tools::getCurrResidentSetSizeMB()
, sk_tools::getMaxResidentSetSizeMB()
, bench->getUniqueName()
, config);
if (FLAGS_compare) {
gSkVMBlitterTraceCapture.printCounts("Total");
SkDebugf("0 ");
gSkRPBlitterTraceCapture.printCounts("Total");
}
SkDebugf("\n");
} else if (FLAGS_quiet) {
const char* mark = " ";
const double stddev_percent =

View File

@ -32,6 +32,7 @@ skia_utils_sources = [
"$_src/utils/SkAnimCodecPlayer.cpp",
"$_src/utils/SkBase64.cpp",
"$_src/utils/SkBitSet.h",
"$_src/utils/SkBlitterTrace.h",
"$_src/utils/SkCallableTraits.h",
"$_src/utils/SkCamera.cpp",
"$_src/utils/SkCanvasStack.cpp",
@ -42,6 +43,7 @@ skia_utils_sources = [
"$_src/utils/SkClipStackUtils.cpp",
"$_src/utils/SkClipStackUtils.h",
"$_src/utils/SkCustomTypeface.cpp",
"$_src/utils/SkCycles.h",
"$_src/utils/SkDashPath.cpp",
"$_src/utils/SkDashPathPriv.h",
"$_src/utils/SkEventTracer.cpp",

View File

@ -632,7 +632,7 @@ SkBlitter* SkBlitterClipper::apply(SkBlitter* blitter, const SkRegion* clip,
bool SkBlitter::UseLegacyBlitter(const SkPixmap& device,
const SkPaint& paint,
const SkMatrix& matrix) {
if (gSkForceRasterPipelineBlitter) {
if (gSkForceRasterPipelineBlitter || gUseSkVMBlitter) {
return false;
}
#if defined(SK_FORCE_RASTER_PIPELINE_BLITTER)
@ -734,20 +734,15 @@ SkBlitter* SkBlitter::Choose(const SkPixmap& device,
paint.writable()->setDither(false);
}
if (gUseSkVMBlitter) {
if (auto blitter = SkVMBlitter::Make(device, *paint, matrixProvider,
alloc, clipShader)) {
return blitter;
}
}
// Same basic idea used a few times: try SkRP, then try SkVM, then give up with a null-blitter.
// (Setting gUseSkVMBlitter is the only way we prefer SkVM over SkRP at the moment.)
auto create_SkRP_or_SkVMBlitter = [&]() -> SkBlitter* {
if (!gUseSkVMBlitter) {
if (auto blitter = SkCreateRasterPipelineBlitter(device, *paint, matrixProvider,
alloc, clipShader)) {
return blitter;
}
}
if (auto blitter = SkVMBlitter::Make(device, *paint, matrixProvider,
alloc, clipShader)) {
return blitter;

View File

@ -17,6 +17,7 @@
#include "src/core/SkVMBlitter.h"
extern bool gUseSkVMBlitter;
extern bool gSkForceRasterPipelineBlitter;
SkSpriteBlitter::SkSpriteBlitter(const SkPixmap& source)
: fSource(source) {}
@ -199,7 +200,9 @@ SkBlitter* SkBlitter::ChooseSprite(const SkPixmap& dst, const SkPaint& paint,
SkSpriteBlitter* blitter = nullptr;
if (0 == SkColorSpaceXformSteps(source,dst).flags.mask() && !clipShader) {
if (gSkForceRasterPipelineBlitter) {
// Do not use any of these optimized memory blitters
} else if (0 == SkColorSpaceXformSteps(source,dst).flags.mask() && !clipShader) {
if (!blitter && SkSpriteBlitter_Memcpy::Supports(dst, source, paint)) {
blitter = alloc->make<SkSpriteBlitter_Memcpy>(source);
}

View File

@ -20,6 +20,9 @@
#include "src/core/SkRasterPipeline.h"
#include "src/core/SkUtils.h"
#include "src/shaders/SkShaderBase.h"
#include "src/utils/SkBlitterTrace.h"
SkBlitterTrace gSkRPBlitterTrace("RP", false);
class SkRasterPipelineBlitter final : public SkBlitter {
public:
@ -47,6 +50,7 @@ public:
void blitV (int x, int y, int height, SkAlpha alpha) override;
private:
void blitRectWithTrace(int x, int y, int w, int h, bool trace);
void append_load_dst (SkRasterPipeline*) const;
void append_store (SkRasterPipeline*) const;
@ -339,7 +343,15 @@ void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
}
void SkRasterPipelineBlitter::blitRect(int x, int y, int w, int h) {
this->blitRectWithTrace(x, y, w, h, true);
}
void SkRasterPipelineBlitter::blitRectWithTrace(int x, int y, int w, int h, bool trace) {
if (fMemset2D) {
SkBlitterTrace::Step trace1(trace ? &gSkRPBlitterTrace : nullptr,
"blitRectByMemset",
/*scanlines=*/h,
/*pixels=*/w * h);
fMemset2D(&fDst, x,y, w,h, fMemsetColor);
return;
}
@ -373,6 +385,10 @@ void SkRasterPipelineBlitter::blitRect(int x, int y, int w, int h) {
fBlitRect = p.compile();
}
SkBlitterTrace::Step trace2(trace ? &gSkRPBlitterTrace : nullptr,
"blitRect",
/*scanlines=*/h,
/*pixels=*/w * h);
fBlitRect(x,y,w,h);
}
@ -397,10 +413,12 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const
fBlitAntiH = p.compile();
}
SkBlitterTrace::Step trace(&gSkRPBlitterTrace, "blitAntiH", /*scanlines=*/1ul, /*pixels=*/0ul);
for (int16_t run = *runs; run > 0; run = *runs) {
trace.add(/*scanlines=*/0, /*pixels=*/run);
switch (*aa) {
case 0x00: break;
case 0xff: this->blitH(x,y,run); break;
case 0xff:this->blitRectWithTrace(x,y,run, 1, false); break;
default:
fCurrentCoverage = *aa * (1/255.0f);
fBlitAntiH(x,y,run,1);
@ -554,5 +572,9 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
}
SkASSERT(blitter);
SkBlitterTrace::Step trace(&gSkRPBlitterTrace,
"blitMask",
/*scanlines=*/clip.height(),
/*pixels=*/clip.width() * clip.height());
(*blitter)(clip.left(),clip.top(), clip.width(),clip.height());
}

View File

@ -21,9 +21,12 @@
#include "src/core/SkVM.h"
#include "src/core/SkVMBlitter.h"
#include "src/shaders/SkColorFilterShader.h"
#include "src/utils/SkBlitterTrace.h"
#include <cinttypes>
SkBlitterTrace gSkVMBlitterTrace("VM", false);
namespace {
// Uniforms set by the Blitter itself,
@ -667,8 +670,10 @@ void SkVMBlitter::blitH(int x, int y, int w) {
skvm::Program* blit_h = this->buildProgram(Coverage::Full);
this->updateUniforms(x+w, y);
if (const void* sprite = this->isSprite(x,y)) {
SkBlitterTrace::Step trace(&gSkVMBlitterTrace, "blitH1", /*scanlines=*/1, /*pixels=*/w);
blit_h->eval(w, fUniforms.buf.data(), fDevice.addr(x,y), sprite);
} else {
SkBlitterTrace::Step trace(&gSkVMBlitterTrace, "blitH2", /*scanlines=*/1, /*pixels=*/w);
blit_h->eval(w, fUniforms.buf.data(), fDevice.addr(x,y));
}
}
@ -677,7 +682,9 @@ void SkVMBlitter::blitAntiH(int x, int y, const SkAlpha cov[], const int16_t run
skvm::Program* blit_anti_h = this->buildProgram(Coverage::UniformF);
skvm::Program* blit_h = this->buildProgram(Coverage::Full);
SkBlitterTrace::Step trace(&gSkVMBlitterTrace, "blitAntiH", /*scanlines=*/1ul, /*pixels=*/0ul);
for (int16_t run = *runs; run > 0; run = *runs) {
trace.add(/*scanlines=*/0, /*pixels=*/run);
const SkAlpha coverage = *cov;
if (coverage != 0x00) {
this->updateUniforms(x+run, y);
@ -727,6 +734,11 @@ void SkVMBlitter::blitMask(const SkMask& mask, const SkIRect& clip) {
SkASSERT(program);
if (program) {
SkBlitterTrace::Step trace(&gSkVMBlitterTrace,
"blitMask",
/*scanlines=*/clip.height(),
/*pixels=*/clip.width() * clip.height());
for (int y = clip.top(); y < clip.bottom(); y++) {
int x = clip.left(),
w = clip.width();

View File

@ -91,6 +91,8 @@ private:
void blitH(int x, int y, int w) override;
void blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) override;
private:
void blitMask(const SkMask& mask, const SkIRect& clip) override;
SkPixmap fDevice;

140
src/utils/SkBlitterTrace.h Normal file
View File

@ -0,0 +1,140 @@
/*
* Copyright 2022 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBlitterTrace_DEFINED
#define SkBlitterTrace_DEFINED
#include <inttypes.h>
#include <unordered_map>
#include "src/utils/SkCycles.h"
class SkBlitterTrace {
/*
* This class collects information for RasterPipeLine vs SkVM
* performance comparison.
* How to get the comparison table:
* 1. Run nanobench for SkVM:
* []/Release/nanobench
* --csv --config 8888 --skvm --compare --loops 100 --samples 1
* --match $(ls skps | grep --invert-match svg ) 2>&1 | tee VM.data
* 2. Run nanobench for RasterPipeLine:
* []/Release/nanobench
* --csv --config 8888 --forceRasterPipeline --compare --loops 100
* --samples 1 --match $(ls skps | grep --invert-match svg )
* 2>&1 | tee RP.data
* 3. Extract the information side-by-side:
* awk 'BEGIN {OFS=","; fileNum = 0} ($2 ~ /MB/) && fileNum == 0
* {vmvmcycles[$3] = $6; vmvmscan[$3] = $8; vmvmpixels[$3] = $10;
* vmvminterp[$3] = $11; vmrpcycle[$3] = $14; vmrpscan[$3] = $16;
* vmrppixels[$3] = $18} ($2 ~ /MB/) && fileNum == 1 {print $3,
* vmvmcycles[$3], vmvmscan[$3], vmvmpixels[$3], vmvminterp[$3], $6, $8,
* $10, $11, $14, $16, $18} ENDFILE {fileNum += 1}'
* VM.data RP.data > compare.csv
* 4. Open the compare.csv table in Google Spreadsheets.
* You will get columns [A:P]. Add 4 more columns with formulas:
* Q: =B/M-1
* R: =N-C
* S: =O-D
* T: =2*(S<>0)+(R<>0)
* To be honest R, S, T columns are here for checking only (they all
* supposed to have zero values in them)
* Column Q shows the actual performance difference. Negative value means
* that wins SkVM, positive - RasterPipeLine.
*/
public:
SkBlitterTrace(const char* header, bool traceSteps = false)
: fHeader(header), fTraceSteps(traceSteps) { }
SkBlitterTrace& operator= (const SkBlitterTrace&) = default;
void addTrace(const char* name, uint64_t cycles, uint64_t scanLines, uint64_t pixels) {
fCycles += cycles;
fScanlines += scanLines;
fPixels += pixels;
if (fTraceSteps) {
printIncrements(name, cycles, scanLines, pixels);
}
}
void reset() {
fCycles = 0ul;
fScanlines = 0ul;
fPixels = 0ul;
}
void printIncrements(const char* name,
uint64_t cycles,
uint64_t scanLines,
uint64_t pixels) const {
SkDebugf("%s %s: cycles=%" PRIu64 "+%" PRIu64
" scanlines=%" PRIu64 "+%" PRIu64 " pixels=%" PRIu64,
fHeader, name,
fCycles - cycles, cycles,
fScanlines - scanLines, scanLines,
fPixels);
SkDebugf("\n");
}
void printCounts(const char* name) const {
SkDebugf("%s cycles: %" PRIu64 " "
" scanlines: %" PRIu64 " pixels: %" PRIu64,
fHeader,
fCycles,
fScanlines,
fPixels);
SkDebugf(" ");
}
void turnTrace(bool value) { fTraceTime = value; }
uint64_t getCycles() const { return fCycles; }
uint64_t getScanlines() const { return fScanlines; }
uint64_t getPixels() const { return fPixels; }
class Step {
public:
Step(SkBlitterTrace* trace,
const char* name,
uint64_t scanlines,
uint64_t pixels)
: fTrace(trace)
, fName(name)
, fScanlines(scanlines)
, fPixels(pixels) {
fStartTime = SkCycles::Now();
}
void add(uint64_t scanlines, uint64_t pixels) {
fScanlines += scanlines;
fPixels += pixels;
}
~Step() {
if (fTrace == nullptr || !fTrace->fTraceTime) {
return;
}
auto endTime = SkCycles::Now() - fStartTime;
fTrace->addTrace(/*name=*/fName,
/*cycles=*/endTime,
/*scanlines=*/fScanlines,
/*pixels=*/fPixels);
}
private:
SkBlitterTrace* fTrace = nullptr;
const char* fName = "";
uint64_t fStartTime = 0ul;
uint64_t fScanlines = 0ul;
uint64_t fPixels = 0ul;
};
private:
const char* fHeader = "";
bool fTraceSteps = false;
bool fTraceTime = false;
uint64_t fCycles = 0ul;
uint64_t fScanlines = 0ul;
uint64_t fPixels = 0ul;
};
#endif

35
src/utils/SkCycles.h Normal file
View File

@ -0,0 +1,35 @@
/*
* Copyright 2021 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkCycles_DEFINED
#define SkCycles_DEFINED
#include <cstdint>
class SkCycles {
public:
static uint64_t Now() {
#if defined(SK_BUILD_FOR_WIN)
{
return 0ul;
}
#elif defined(SK_CPU_X86)
{
uint32_t cpuInfo;
return __builtin_ia32_rdtscp(&cpuInfo);
}
#elif defined(SK_CPU_ARM64)
{
int64_t cycles;
asm volatile("mrs %0, cntvct_el0" : "=r"(cycles));
return cycles;
}
#else
{
return 0ul;
}
#endif
}
};
#endif // SkCycles_DEFINED