Revert of skpbench: add option for gpu timing (patchset #7 id:120001 of https://codereview.chromium.org/2388433003/ )

Reason for revert:
many bots failing

Original issue's description:
> skpbench: add option for gpu timing
>
> Adds a gpu timing option with a GL implementation.
>
> BUG=skia:
> GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2388433003
>
> Committed: https://skia.googlesource.com/skia/+/c06720d06faab3b01eba1b8693e0ac791f06dc96

TBR=egdaniel@google.com,bsalomon@google.com,csmartdalton@google.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Review-Url: https://codereview.chromium.org/2390383002
This commit is contained in:
mtklein 2016-10-04 12:49:45 -07:00 committed by Commit bot
parent 60b0a2d85c
commit 56df2de7fb
9 changed files with 66 additions and 380 deletions

View File

@ -13,7 +13,7 @@
namespace sk_gpu_test { namespace sk_gpu_test {
using PlatformFence = intptr_t; using PlatformFence = intptr_t;
static constexpr PlatformFence kInvalidFence = 0; static constexpr PlatformFence kInvalidPlatformFence = 0;
/* /*
* This class provides an interface to interact with fence syncs. A fence sync is an object that the * This class provides an interface to interact with fence syncs. A fence sync is an object that the
@ -29,6 +29,6 @@ public:
virtual ~FenceSync() {} virtual ~FenceSync() {}
}; };
} // namespace sk_gpu_test }
#endif #endif

View File

@ -1,77 +0,0 @@
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef GpuTimer_DEFINED
#define GpuTimer_DEFINED
#include "SkTypes.h"
#include "SkExchange.h"
#include <chrono>
namespace sk_gpu_test {
using PlatformTimerQuery = intptr_t;
static constexpr PlatformTimerQuery kInvalidTimerQuery = 0;
/**
* Platform-independent interface for timing operations on the GPU.
*/
class GpuTimer {
public:
GpuTimer(bool disjointSupport)
: fDisjointSupport(disjointSupport)
, fActiveTimer(kInvalidTimerQuery) {
}
virtual ~GpuTimer() { SkASSERT(!fActiveTimer); }
/**
* Returns whether this timer can detect disjoint GPU operations while timing. If false, a query
* has less confidence when it completes with QueryStatus::kAccurate.
*/
bool disjointSupport() const { return fDisjointSupport; }
/**
* Inserts a "start timing" command in the GPU command stream.
*/
void queueStart() {
SkASSERT(!fActiveTimer);
fActiveTimer = this->onQueueTimerStart();
}
/**
* Inserts a "stop timing" command in the GPU command stream.
*
* @return a query object that can retrieve the time elapsed once the timer has completed.
*/
PlatformTimerQuery SK_WARN_UNUSED_RESULT queueStop() {
SkASSERT(fActiveTimer);
this->onQueueTimerStop(fActiveTimer);
return skstd::exchange(fActiveTimer, kInvalidTimerQuery);
}
enum class QueryStatus {
kInvalid, //<! the timer query is invalid.
kPending, //<! the timer is still running on the GPU.
kDisjoint, //<! the query is complete, but dubious due to disjoint GPU operations.
kAccurate //<! the query is complete and reliable.
};
virtual QueryStatus checkQueryStatus(PlatformTimerQuery) = 0;
virtual std::chrono::nanoseconds getTimeElapsed(PlatformTimerQuery) = 0;
virtual void deleteQuery(PlatformTimerQuery) = 0;
private:
virtual PlatformTimerQuery onQueueTimerStart() const = 0;
virtual void onQueueTimerStop(PlatformTimerQuery) const = 0;
bool const fDisjointSupport;
PlatformTimerQuery fActiveTimer;
};
} // namespace sk_gpu_test
#endif

View File

@ -8,13 +8,8 @@
#include "TestContext.h" #include "TestContext.h"
#include "GpuTimer.h"
namespace sk_gpu_test { namespace sk_gpu_test {
TestContext::TestContext() TestContext::TestContext() : fFenceSync(nullptr), fCurrentFenceIdx(0) {
: fFenceSync(nullptr)
, fGpuTimer(nullptr)
, fCurrentFenceIdx(0) {
memset(fFrameFences, 0, sizeof(fFrameFences)); memset(fFrameFences, 0, sizeof(fFrameFences));
} }
@ -26,7 +21,6 @@ TestContext::~TestContext() {
} }
#endif #endif
SkASSERT(!fFenceSync); SkASSERT(!fFenceSync);
SkASSERT(!fGpuTimer);
} }
void TestContext::makeCurrent() const { this->onPlatformMakeCurrent(); } void TestContext::makeCurrent() const { this->onPlatformMakeCurrent(); }
@ -69,7 +63,6 @@ void TestContext::teardown() {
delete fFenceSync; delete fFenceSync;
fFenceSync = nullptr; fFenceSync = nullptr;
} }
delete fGpuTimer;
} }
} }

View File

@ -14,9 +14,6 @@
#include "../private/SkTemplates.h" #include "../private/SkTemplates.h"
namespace sk_gpu_test { namespace sk_gpu_test {
class GpuTimer;
/** /**
* An offscreen 3D context. This class is intended for Skia's internal testing needs and not * An offscreen 3D context. This class is intended for Skia's internal testing needs and not
* for general use. * for general use.
@ -30,9 +27,6 @@ public:
bool fenceSyncSupport() const { return fFenceSync != nullptr; } bool fenceSyncSupport() const { return fFenceSync != nullptr; }
FenceSync* fenceSync() { SkASSERT(fFenceSync); return fFenceSync; } FenceSync* fenceSync() { SkASSERT(fFenceSync); return fFenceSync; }
bool gpuTimingSupport() const { return fGpuTimer != nullptr; }
GpuTimer* gpuTimer() const { SkASSERT(fGpuTimer); return fGpuTimer; }
bool getMaxGpuFrameLag(int *maxFrameLag) const { bool getMaxGpuFrameLag(int *maxFrameLag) const {
if (!fFenceSync) { if (!fFenceSync) {
return false; return false;
@ -81,8 +75,7 @@ public:
virtual void finish() = 0; virtual void finish() = 0;
protected: protected:
FenceSync* fFenceSync; FenceSync* fFenceSync;
GpuTimer* fGpuTimer;
TestContext(); TestContext();

View File

@ -6,8 +6,6 @@
*/ */
#include "GLTestContext.h" #include "GLTestContext.h"
#include "GpuTimer.h"
#include "gl/GrGLUtil.h" #include "gl/GrGLUtil.h"
namespace { namespace {
@ -79,133 +77,6 @@ void GLFenceSync::deleteFence(sk_gpu_test::PlatformFence fence) const {
fGLDeleteSync(glsync); fGLDeleteSync(glsync);
} }
class GLGpuTimer : public sk_gpu_test::GpuTimer {
public:
static GLGpuTimer* CreateIfSupported(const sk_gpu_test::GLTestContext*);
QueryStatus checkQueryStatus(sk_gpu_test::PlatformTimerQuery) override;
std::chrono::nanoseconds getTimeElapsed(sk_gpu_test::PlatformTimerQuery) override;
void deleteQuery(sk_gpu_test::PlatformTimerQuery) override;
private:
GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext*, const char* ext = "");
bool validate() const;
sk_gpu_test::PlatformTimerQuery onQueueTimerStart() const override;
void onQueueTimerStop(sk_gpu_test::PlatformTimerQuery) const override;
static constexpr GrGLenum GL_QUERY_RESULT = 0x8866;
static constexpr GrGLenum GL_QUERY_RESULT_AVAILABLE = 0x8867;
static constexpr GrGLenum GL_TIME_ELAPSED = 0x88bf;
static constexpr GrGLenum GL_GPU_DISJOINT = 0x8fbb;
typedef void (GR_GL_FUNCTION_TYPE* GLGetIntegervProc) (GrGLenum, GrGLint*);
typedef void (GR_GL_FUNCTION_TYPE* GLGenQueriesProc) (GrGLsizei, GrGLuint*);
typedef void (GR_GL_FUNCTION_TYPE* GLDeleteQueriesProc) (GrGLsizei, const GrGLuint*);
typedef void (GR_GL_FUNCTION_TYPE* GLBeginQueryProc) (GrGLenum, GrGLuint);
typedef void (GR_GL_FUNCTION_TYPE* GLEndQueryProc) (GrGLenum);
typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectuivProc) (GrGLuint, GrGLenum, GrGLuint*);
typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectui64vProc) (GrGLuint, GrGLenum, GrGLuint64*);
GLGetIntegervProc fGLGetIntegerv;
GLGenQueriesProc fGLGenQueries;
GLDeleteQueriesProc fGLDeleteQueries;
GLBeginQueryProc fGLBeginQuery;
GLEndQueryProc fGLEndQuery;
GLGetQueryObjectuivProc fGLGetQueryObjectuiv;
GLGetQueryObjectui64vProc fGLGetQueryObjectui64v;
typedef sk_gpu_test::GpuTimer INHERITED;
};
GLGpuTimer* GLGpuTimer::CreateIfSupported(const sk_gpu_test::GLTestContext* ctx) {
SkAutoTDelete<GLGpuTimer> ret;
const GrGLInterface* gl = ctx->gl();
if (gl->fExtensions.has("GL_EXT_disjoint_timer_query")) {
ret.reset(new GLGpuTimer(true, ctx, "EXT"));
} else if (kGL_GrGLStandard == gl->fStandard &&
(GrGLGetVersion(gl) > GR_GL_VER(3,3) || gl->fExtensions.has("GL_ARB_timer_query"))) {
ret.reset(new GLGpuTimer(false, ctx));
} else if (gl->fExtensions.has("GL_EXT_timer_query")) {
ret.reset(new GLGpuTimer(false, ctx, "EXT"));
}
return ret && ret->validate() ? ret.release() : nullptr;
}
GLGpuTimer::GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext* ctx, const char* ext)
: INHERITED(disjointSupport) {
ctx->getGLProcAddress(&fGLGetIntegerv, "glGetIntegerv");
ctx->getGLProcAddress(&fGLGenQueries, "glGenQueries", ext);
ctx->getGLProcAddress(&fGLDeleteQueries, "glDeleteQueries", ext);
ctx->getGLProcAddress(&fGLBeginQuery, "glBeginQuery", ext);
ctx->getGLProcAddress(&fGLEndQuery, "glEndQuery", ext);
ctx->getGLProcAddress(&fGLGetQueryObjectuiv, "glGetQueryObjectuiv", ext);
ctx->getGLProcAddress(&fGLGetQueryObjectui64v, "glGetQueryObjectui64v", ext);
}
bool GLGpuTimer::validate() const {
return fGLGetIntegerv && fGLGenQueries && fGLDeleteQueries && fGLBeginQuery && fGLEndQuery &&
fGLGetQueryObjectuiv && fGLGetQueryObjectui64v;
}
sk_gpu_test::PlatformTimerQuery GLGpuTimer::onQueueTimerStart() const {
GrGLuint queryID;
fGLGenQueries(1, &queryID);
if (!queryID) {
return sk_gpu_test::kInvalidTimerQuery;
}
if (this->disjointSupport()) {
// Clear the disjoint flag.
GrGLint disjoint;
fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
}
fGLBeginQuery(GL_TIME_ELAPSED, queryID);
return static_cast<sk_gpu_test::PlatformTimerQuery>(queryID);
}
void GLGpuTimer::onQueueTimerStop(sk_gpu_test::PlatformTimerQuery platformTimer) const {
if (sk_gpu_test::kInvalidTimerQuery == platformTimer) {
return;
}
fGLEndQuery(GL_TIME_ELAPSED);
}
sk_gpu_test::GpuTimer::QueryStatus
GLGpuTimer::checkQueryStatus(sk_gpu_test::PlatformTimerQuery platformTimer) {
const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
if (!queryID) {
return QueryStatus::kInvalid;
}
GrGLuint available = 0;
fGLGetQueryObjectuiv(queryID, GL_QUERY_RESULT_AVAILABLE, &available);
if (!available) {
return QueryStatus::kPending;
}
if (this->disjointSupport()) {
GrGLint disjoint = 1;
fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
if (disjoint) {
return QueryStatus::kDisjoint;
}
}
return QueryStatus::kAccurate;
}
std::chrono::nanoseconds GLGpuTimer::getTimeElapsed(sk_gpu_test::PlatformTimerQuery platformTimer) {
SkASSERT(this->checkQueryStatus(platformTimer) >= QueryStatus::kDisjoint);
const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
GrGLuint64 nanoseconds;
fGLGetQueryObjectui64v(queryID, GL_QUERY_RESULT, &nanoseconds);
return std::chrono::nanoseconds(nanoseconds);
}
void GLGpuTimer::deleteQuery(sk_gpu_test::PlatformTimerQuery platformTimer) {
const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
fGLDeleteQueries(1, &queryID);
}
} // anonymous namespace } // anonymous namespace
namespace sk_gpu_test { namespace sk_gpu_test {
@ -220,7 +91,6 @@ void GLTestContext::init(const GrGLInterface* gl, FenceSync* fenceSync) {
SkASSERT(!fGL.get()); SkASSERT(!fGL.get());
fGL.reset(gl); fGL.reset(gl);
fFenceSync = fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this); fFenceSync = fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this);
fGpuTimer = GLGpuTimer::CreateIfSupported(this);
} }
void GLTestContext::teardown() { void GLTestContext::teardown() {

View File

@ -25,8 +25,6 @@ class BenchResult:
'(?P<samples>\d+)' '(?P<samples>\d+)'
'(?P<sample_ms_pad> +)' '(?P<sample_ms_pad> +)'
'(?P<sample_ms>\d+)' '(?P<sample_ms>\d+)'
'(?P<clock_pad> +)'
'(?P<clock>[cg]pu)'
'(?P<metric_pad> +)' '(?P<metric_pad> +)'
'(?P<metric>ms|fps)' '(?P<metric>ms|fps)'
'(?P<config_pad> +)' '(?P<config_pad> +)'
@ -47,7 +45,6 @@ class BenchResult:
self.stddev = float(match.group('stddev')[:-1]) # Drop '%' sign. self.stddev = float(match.group('stddev')[:-1]) # Drop '%' sign.
self.samples = int(match.group('samples')) self.samples = int(match.group('samples'))
self.sample_ms = int(match.group('sample_ms')) self.sample_ms = int(match.group('sample_ms'))
self.clock = match.group('clock')
self.metric = match.group('metric') self.metric = match.group('metric')
self.config = match.group('config') self.config = match.group('config')
self.bench = match.group('bench') self.bench = match.group('bench')
@ -62,7 +59,7 @@ class BenchResult:
else: else:
values = list() values = list()
for name in ['accum', 'median', 'max', 'min', 'stddev', for name in ['accum', 'median', 'max', 'min', 'stddev',
'samples', 'sample_ms', 'clock', 'metric', 'config']: 'samples', 'sample_ms', 'metric', 'config']:
values.append(self.get_string(name + '_pad')) values.append(self.get_string(name + '_pad'))
values.append(self.get_string(name)) values.append(self.get_string(name))
values.append(config_suffix) values.append(config_suffix)

View File

@ -8,8 +8,8 @@
from __future__ import print_function from __future__ import print_function
from _benchresult import BenchResult from _benchresult import BenchResult
from argparse import ArgumentParser from argparse import ArgumentParser
from collections import defaultdict, namedtuple
from datetime import datetime from datetime import datetime
import collections
import operator import operator
import os import os
import sys import sys
@ -27,7 +27,7 @@ This script can also be used to generate a Google sheet:
(1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension: (1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension:
https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj
(2) Update your global OS file associations to use Chrome for .csv files. (2) Designate Chrome os-wide as the default application for opening .csv files.
(3) Run parseskpbench.py with the --open flag. (3) Run parseskpbench.py with the --open flag.
@ -49,92 +49,75 @@ __argparse.add_argument('sources',
FLAGS = __argparse.parse_args() FLAGS = __argparse.parse_args()
RESULT_QUALIFIERS = ('sample_ms', 'clock', 'metric')
class FullConfig(namedtuple('fullconfig', ('config',) + RESULT_QUALIFIERS)):
def qualified_name(self, qualifiers=RESULT_QUALIFIERS):
return get_qualified_name(self.config.replace(',', ' '),
{x:getattr(self, x) for x in qualifiers})
def get_qualified_name(name, qualifiers):
if not qualifiers:
return name
else:
args = ('%s=%s' % (k,v) for k,v in qualifiers.iteritems())
return '%s (%s)' % (name, ' '.join(args))
class Parser: class Parser:
def __init__(self): def __init__(self):
self.sheet_qualifiers = {x:None for x in RESULT_QUALIFIERS} self.configs = list() # use list to preserve the order configs appear in.
self.config_qualifiers = set() self.rows = collections.defaultdict(dict)
self.fullconfigs = list() # use list to preserve the order. self.cols = collections.defaultdict(dict)
self.rows = defaultdict(dict) self.metric = None
self.cols = defaultdict(dict) self.sample_ms = None
def parse_file(self, infile): def parse_file(self, infile):
for line in infile: for line in infile:
match = BenchResult.match(line) match = BenchResult.match(line)
if not match: if not match:
continue continue
if self.metric is None:
fullconfig = FullConfig(*(match.get_string(x) self.metric = match.metric
for x in FullConfig._fields)) elif match.metric != self.metric:
if not fullconfig in self.fullconfigs: raise ValueError("results have mismatched metrics (%s and %s)" %
self.fullconfigs.append(fullconfig) (self.metric, match.metric))
if self.sample_ms is None:
for qualifier, value in self.sheet_qualifiers.items(): self.sample_ms = match.sample_ms
if value is None: elif not FLAGS.force and match.sample_ms != self.sample_ms:
self.sheet_qualifiers[qualifier] = match.get_string(qualifier) raise ValueError("results have mismatched sampling times. "
elif value != match.get_string(qualifier): "(use --force to ignore)")
del self.sheet_qualifiers[qualifier] if not match.config in self.configs:
self.config_qualifiers.add(qualifier) self.configs.append(match.config)
self.rows[match.bench][match.config] = match.get_string(FLAGS.result)
self.rows[match.bench][fullconfig] = match.get_string(FLAGS.result) self.cols[match.config][match.bench] = getattr(match, FLAGS.result)
self.cols[fullconfig][match.bench] = getattr(match, FLAGS.result)
def print_csv(self, outfile=sys.stdout): def print_csv(self, outfile=sys.stdout):
# Write the title. print('%s_%s' % (FLAGS.result, self.metric), file=outfile)
print(get_qualified_name(FLAGS.result, self.sheet_qualifiers), file=outfile)
# Write the header. # Write the header.
outfile.write('bench,') outfile.write('bench,')
for fullconfig in self.fullconfigs: for config in self.configs:
outfile.write('%s,' % fullconfig.qualified_name(self.config_qualifiers)) outfile.write('%s,' % config)
outfile.write('\n') outfile.write('\n')
# Write the rows. # Write the rows.
for bench, row in self.rows.iteritems(): for bench, row in self.rows.items():
outfile.write('%s,' % bench) outfile.write('%s,' % bench)
for fullconfig in self.fullconfigs: for config in self.configs:
if fullconfig in row: if config in row:
outfile.write('%s,' % row[fullconfig]) outfile.write('%s,' % row[config])
elif FLAGS.force: elif FLAGS.force:
outfile.write('NULL,') outfile.write(',')
else: else:
raise ValueError("%s: missing value for %s. (use --force to ignore)" % raise ValueError("%s: missing value for %s. (use --force to ignore)" %
(bench, (bench, config))
fullconfig.qualified_name(self.config_qualifiers)))
outfile.write('\n') outfile.write('\n')
# Add simple, literal averages. # Add simple, literal averages.
if len(self.rows) > 1: if len(self.rows) > 1:
outfile.write('\n') outfile.write('\n')
self._print_computed_row('MEAN', self.__print_computed_row('MEAN',
lambda col: reduce(operator.add, col.values()) / len(col), lambda col: reduce(operator.add, col.values()) / len(col),
outfile=outfile) outfile=outfile)
self._print_computed_row('GEOMEAN', self.__print_computed_row('GEOMEAN',
lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)), lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)),
outfile=outfile) outfile=outfile)
def _print_computed_row(self, name, func, outfile=sys.stdout): def __print_computed_row(self, name, func, outfile=sys.stdout):
outfile.write('%s,' % name) outfile.write('%s,' % name)
for fullconfig in self.fullconfigs: for config in self.configs:
if len(self.cols[fullconfig]) != len(self.rows): assert(len(self.cols[config]) == len(self.rows))
outfile.write('NULL,') outfile.write('%.4g,' % func(self.cols[config]))
continue
outfile.write('%.4g,' % func(self.cols[fullconfig]))
outfile.write('\n') outfile.write('\n')
def main(): def main():
parser = Parser() parser = Parser()

View File

@ -5,7 +5,6 @@
* found in the LICENSE file. * found in the LICENSE file.
*/ */
#include "GpuTimer.h"
#include "GrContextFactory.h" #include "GrContextFactory.h"
#include "SkCanvas.h" #include "SkCanvas.h"
#include "SkOSFile.h" #include "SkOSFile.h"
@ -34,9 +33,12 @@
* Currently, only GPU configs are supported. * Currently, only GPU configs are supported.
*/ */
using sk_gpu_test::PlatformFence;
using sk_gpu_test::kInvalidPlatformFence;
using sk_gpu_test::FenceSync;
DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
DEFINE_bool(fps, false, "use fps instead of ms"); DEFINE_bool(fps, false, "use fps instead of ms");
DEFINE_string(skp, "", "path to a single .skp file to benchmark"); DEFINE_string(skp, "", "path to a single .skp file to benchmark");
DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"); DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
@ -44,13 +46,13 @@ DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
DEFINE_bool(suppressHeader, false, "don't print a header row before the results"); DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
static const char* header = static const char* header =
" accum median max min stddev samples sample_ms clock metric config bench"; " accum median max min stddev samples sample_ms metric config bench";
static const char* resultFormat = static const char* resultFormat =
"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s";
struct Sample { struct Sample {
using duration = std::chrono::nanoseconds; using clock = std::chrono::high_resolution_clock;
Sample() : fFrames(0), fDuration(0) {} Sample() : fFrames(0), fDuration(0) {}
double seconds() const { return std::chrono::duration<double>(fDuration).count(); } double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
@ -58,13 +60,13 @@ struct Sample {
double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; } double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
int fFrames; int fFrames;
duration fDuration; clock::duration fDuration;
}; };
class GpuSync { class GpuSync {
public: public:
GpuSync(const sk_gpu_test::FenceSync* fenceSync); GpuSync(const FenceSync* fenceSync);
~GpuSync(); ~GpuSync();
void syncToPreviousFrame(); void syncToPreviousFrame();
@ -72,8 +74,8 @@ public:
private: private:
void updateFence(); void updateFence();
const sk_gpu_test::FenceSync* const fFenceSync; const FenceSync* const fFenceSync;
sk_gpu_test::PlatformFence fFence; PlatformFence fFence;
}; };
enum class ExitErr { enum class ExitErr {
@ -90,10 +92,10 @@ static bool mkdir_p(const SkString& name);
static SkString join(const SkCommandLineFlags::StringArray&); static SkString join(const SkCommandLineFlags::StringArray&);
static void exitf(ExitErr, const char* format, ...); static void exitf(ExitErr, const char* format, ...);
static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const SkPicture* skp,
const SkPicture* skp, std::vector<Sample>* samples) { std::vector<Sample>* samples) {
using clock = std::chrono::high_resolution_clock; using clock = Sample::clock;
const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
draw_skp_and_flush(canvas, skp); draw_skp_and_flush(canvas, skp);
@ -121,66 +123,6 @@ static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* can
} while (now < endTime || 0 == samples->size() % 2); } while (now < endTime || 0 == samples->size() % 2);
} }
static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
const SkPicture* skp, std::vector<Sample>* samples) {
using sk_gpu_test::PlatformTimerQuery;
using clock = std::chrono::steady_clock;
const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
if (!gpuTimer->disjointSupport()) {
fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
"results may be unreliable\n");
}
draw_skp_and_flush(canvas, skp);
GpuSync gpuSync(fenceSync);
gpuTimer->queueStart();
draw_skp_and_flush(canvas, skp);
PlatformTimerQuery previousTime = gpuTimer->queueStop();
gpuSync.syncToPreviousFrame();
clock::time_point now = clock::now();
const clock::time_point endTime = now + benchDuration;
do {
const clock::time_point sampleEndTime = now + sampleDuration;
samples->emplace_back();
Sample& sample = samples->back();
do {
gpuTimer->queueStart();
draw_skp_and_flush(canvas, skp);
PlatformTimerQuery time = gpuTimer->queueStop();
gpuSync.syncToPreviousFrame();
switch (gpuTimer->checkQueryStatus(previousTime)) {
using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
case QueryStatus::kInvalid:
exitf(ExitErr::kUnavailable, "GPU timer failed");
case QueryStatus::kPending:
exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
case QueryStatus::kDisjoint:
if (FLAGS_verbosity >= 4) {
fprintf(stderr, "discarding timer query due to disjoint operations.\n");
}
break;
case QueryStatus::kAccurate:
sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
++sample.fFrames;
break;
}
gpuTimer->deleteQuery(previousTime);
previousTime = time;
now = clock::now();
} while (now < sampleEndTime || 0 == sample.fFrames);
} while (now < endTime || 0 == samples->size() % 2);
gpuTimer->deleteQuery(previousTime);
}
void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) {
if (0 == (samples.size() % 2)) { if (0 == (samples.size() % 2)) {
exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples"); exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
@ -207,8 +149,7 @@ void print_result(const std::vector<Sample>& samples, const char* config, const
const double stddev = 100/*%*/ * sqrt(variance) / accumValue; const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(), printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(), stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, bench);
config, bench);
printf("\n"); printf("\n");
fflush(stdout); fflush(stdout);
} }
@ -306,15 +247,7 @@ int main(int argc, char** argv) {
// Run the benchmark. // Run the benchmark.
SkCanvas* canvas = surface->getCanvas(); SkCanvas* canvas = surface->getCanvas();
canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
if (!FLAGS_gpuClock) { run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
} else {
if (!testCtx->gpuTimingSupport()) {
exitf(ExitErr::kUnavailable, "GPU does not support timing");
}
run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
&samples);
}
print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str()); print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str());
// Save a proof (if one was requested). // Save a proof (if one was requested).
@ -367,7 +300,7 @@ static void exitf(ExitErr err, const char* format, ...) {
exit((int)err); exit((int)err);
} }
GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) GpuSync::GpuSync(const FenceSync* fenceSync)
: fFenceSync(fenceSync) { : fFenceSync(fenceSync) {
this->updateFence(); this->updateFence();
} }
@ -377,7 +310,7 @@ GpuSync::~GpuSync() {
} }
void GpuSync::syncToPreviousFrame() { void GpuSync::syncToPreviousFrame() {
if (sk_gpu_test::kInvalidFence == fFence) { if (kInvalidPlatformFence == fFence) {
exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
} }
if (!fFenceSync->waitFence(fFence)) { if (!fFenceSync->waitFence(fFence)) {
@ -389,7 +322,7 @@ void GpuSync::syncToPreviousFrame() {
void GpuSync::updateFence() { void GpuSync::updateFence() {
fFence = fFenceSync->insertFence(); fFence = fFenceSync->insertFence();
if (sk_gpu_test::kInvalidFence == fFence) { if (kInvalidPlatformFence == fFence) {
exitf(ExitErr::kUnavailable, "failed to insert fence"); exitf(ExitErr::kUnavailable, "failed to insert fence");
} }
} }

View File

@ -32,8 +32,7 @@ unacceptable stddev.
__argparse.add_argument('--adb', __argparse.add_argument('--adb',
action='store_true', help="execute skpbench over adb") action='store_true', help="execute skpbench over adb")
__argparse.add_argument('-s', '--device-serial', __argparse.add_argument('-s', '--device-serial',
help="if using adb, ID of the specific device to target " help="if using adb, id of the specific device to target")
"(only required if more than 1 device is attached)")
__argparse.add_argument('-p', '--path', __argparse.add_argument('-p', '--path',
help="directory to execute ./skpbench from") help="directory to execute ./skpbench from")
__argparse.add_argument('-m', '--max-stddev', __argparse.add_argument('-m', '--max-stddev',
@ -48,10 +47,7 @@ __argparse.add_argument('-v','--verbosity',
__argparse.add_argument('-d', '--duration', __argparse.add_argument('-d', '--duration',
type=int, help="number of milliseconds to run each benchmark") type=int, help="number of milliseconds to run each benchmark")
__argparse.add_argument('-l', '--sample-ms', __argparse.add_argument('-l', '--sample-ms',
type=int, help="duration of a sample (minimum)") type=int, help="minimum duration of a sample")
__argparse.add_argument('--gpu',
action='store_true',
help="perform timing on the gpu clock instead of cpu (gpu work only)")
__argparse.add_argument('--fps', __argparse.add_argument('--fps',
action='store_true', help="use fps instead of ms") action='store_true', help="use fps instead of ms")
__argparse.add_argument('-c', '--config', __argparse.add_argument('-c', '--config',
@ -97,8 +93,6 @@ class SKPBench:
ARGV.extend(['--duration', str(FLAGS.duration)]) ARGV.extend(['--duration', str(FLAGS.duration)])
if FLAGS.sample_ms: if FLAGS.sample_ms:
ARGV.extend(['--sampleMs', str(FLAGS.sample_ms)]) ARGV.extend(['--sampleMs', str(FLAGS.sample_ms)])
if FLAGS.gpu:
ARGV.extend(['--gpuClock', 'true'])
if FLAGS.fps: if FLAGS.fps:
ARGV.extend(['--fps', 'true']) ARGV.extend(['--fps', 'true'])
if FLAGS.path: if FLAGS.path:
@ -194,7 +188,7 @@ class SKPBench:
def terminate(self): def terminate(self):
if self._proc: if self._proc:
self._proc.terminate() self._proc.kill()
self._monitor.join() self._monitor.join()
self._proc.wait() self._proc.wait()
self._proc = None self._proc = None