Revert of skpbench: add option for gpu timing (patchset #7 id:120001 of https://codereview.chromium.org/2388433003/ )

Reason for revert: many bots failing Original issue's description: > skpbench: add option for gpu timing > > Adds a gpu timing option with a GL implementation. > > BUG=skia: > GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2388433003 > > Committed: https://skia.googlesource.com/skia/+/c06720d06faab3b01eba1b8693e0ac791f06dc96 TBR=egdaniel@google.com,bsalomon@google.com,csmartdalton@google.com # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review-Url: https://codereview.chromium.org/2390383002
2016-10-04 12:49:45 -07:00 · 2016-10-04 12:49:45 -07:00 · 56df2de7fb
commit 56df2de7fb
parent 60b0a2d85c
9 changed files with 66 additions and 380 deletions
--- a/tools/gpu/FenceSync.h
+++ b/tools/gpu/FenceSync.h
@ -13,7 +13,7 @@
 namespace sk_gpu_test {

 using PlatformFence = intptr_t;
-static constexpr PlatformFence kInvalidFence = 0;
+static constexpr PlatformFence kInvalidPlatformFence = 0;

 /*
 * This class provides an interface to interact with fence syncs. A fence sync is an object that the
@ -29,6 +29,6 @@ public:
    virtual ~FenceSync() {}
 };

-}  // namespace sk_gpu_test
+}

 #endif
--- a/tools/gpu/GpuTimer.h
+++ b/tools/gpu/GpuTimer.h
@ -1,77 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GpuTimer_DEFINED
-#define GpuTimer_DEFINED
-
-#include "SkTypes.h"
-#include "SkExchange.h"
-#include <chrono>
-
-namespace sk_gpu_test {
-
-using PlatformTimerQuery = intptr_t;
-static constexpr PlatformTimerQuery kInvalidTimerQuery = 0;
-
-/**
- * Platform-independent interface for timing operations on the GPU.
- */
-class GpuTimer {
-public:
-    GpuTimer(bool disjointSupport)
-        : fDisjointSupport(disjointSupport)
-        , fActiveTimer(kInvalidTimerQuery) {
-    }
-    virtual ~GpuTimer() { SkASSERT(!fActiveTimer); }
-
-    /**
-     * Returns whether this timer can detect disjoint GPU operations while timing. If false, a query
-     * has less confidence when it completes with QueryStatus::kAccurate.
-     */
-    bool disjointSupport() const { return fDisjointSupport; }
-
-    /**
-     * Inserts a "start timing" command in the GPU command stream.
-     */
-    void queueStart() {
-        SkASSERT(!fActiveTimer);
-        fActiveTimer = this->onQueueTimerStart();
-    }
-
-    /**
-     * Inserts a "stop timing" command in the GPU command stream.
-     *
-     * @return a query object that can retrieve the time elapsed once the timer has completed.
-     */
-    PlatformTimerQuery SK_WARN_UNUSED_RESULT queueStop() {
-        SkASSERT(fActiveTimer);
-        this->onQueueTimerStop(fActiveTimer);
-        return skstd::exchange(fActiveTimer, kInvalidTimerQuery);
-    }
-
-    enum class QueryStatus {
-        kInvalid,  //<! the timer query is invalid.
-        kPending,  //<! the timer is still running on the GPU.
-        kDisjoint, //<! the query is complete, but dubious due to disjoint GPU operations.
-        kAccurate  //<! the query is complete and reliable.
-    };
-
-    virtual QueryStatus checkQueryStatus(PlatformTimerQuery) = 0;
-    virtual std::chrono::nanoseconds getTimeElapsed(PlatformTimerQuery) = 0;
-    virtual void deleteQuery(PlatformTimerQuery) = 0;
-
-private:
-    virtual PlatformTimerQuery onQueueTimerStart() const = 0;
-    virtual void onQueueTimerStop(PlatformTimerQuery) const = 0;
-
-    bool const           fDisjointSupport;
-    PlatformTimerQuery   fActiveTimer;
-};
-
-}  // namespace sk_gpu_test
-
-#endif
--- a/tools/gpu/TestContext.cpp
+++ b/tools/gpu/TestContext.cpp
@ -8,13 +8,8 @@

 #include "TestContext.h"

-#include "GpuTimer.h"
-
 namespace sk_gpu_test {
-TestContext::TestContext()
-    : fFenceSync(nullptr)
-    , fGpuTimer(nullptr)
-    , fCurrentFenceIdx(0) {
+TestContext::TestContext() : fFenceSync(nullptr), fCurrentFenceIdx(0) {
    memset(fFrameFences, 0, sizeof(fFrameFences));
 }

@ -26,7 +21,6 @@ TestContext::~TestContext() {
    }
 #endif
    SkASSERT(!fFenceSync);
-    SkASSERT(!fGpuTimer);
 }

 void TestContext::makeCurrent() const { this->onPlatformMakeCurrent(); }
@ -69,7 +63,6 @@ void TestContext::teardown() {
        delete fFenceSync;
        fFenceSync = nullptr;
    }
-    delete fGpuTimer;
 }

 }
--- a/tools/gpu/TestContext.h
+++ b/tools/gpu/TestContext.h
@ -14,9 +14,6 @@
 #include "../private/SkTemplates.h"

 namespace sk_gpu_test {
-
-class GpuTimer;
-
 /**
 * An offscreen 3D context. This class is intended for Skia's internal testing needs and not
 * for general use.
@ -30,9 +27,6 @@ public:
    bool fenceSyncSupport() const { return fFenceSync != nullptr; }
    FenceSync* fenceSync() { SkASSERT(fFenceSync); return fFenceSync; }

-    bool gpuTimingSupport() const { return fGpuTimer != nullptr; }
-    GpuTimer* gpuTimer() const { SkASSERT(fGpuTimer); return fGpuTimer; }
-
    bool getMaxGpuFrameLag(int *maxFrameLag) const {
        if (!fFenceSync) {
            return false;
@ -81,8 +75,7 @@ public:
    virtual void finish() = 0;

 protected:
-    FenceSync*   fFenceSync;
-    GpuTimer*    fGpuTimer;
+    FenceSync* fFenceSync;

    TestContext();

--- a/tools/gpu/gl/GLTestContext.cpp
+++ b/tools/gpu/gl/GLTestContext.cpp
@ -6,8 +6,6 @@
 */

 #include "GLTestContext.h"
-
-#include "GpuTimer.h"
 #include "gl/GrGLUtil.h"

 namespace {
@ -79,133 +77,6 @@ void GLFenceSync::deleteFence(sk_gpu_test::PlatformFence fence) const {
    fGLDeleteSync(glsync);
 }

-class GLGpuTimer : public sk_gpu_test::GpuTimer {
-public:
-    static GLGpuTimer* CreateIfSupported(const sk_gpu_test::GLTestContext*);
-
-    QueryStatus checkQueryStatus(sk_gpu_test::PlatformTimerQuery) override;
-    std::chrono::nanoseconds getTimeElapsed(sk_gpu_test::PlatformTimerQuery) override;
-    void deleteQuery(sk_gpu_test::PlatformTimerQuery) override;
-
-private:
-    GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext*, const char* ext = "");
-
-    bool validate() const;
-
-    sk_gpu_test::PlatformTimerQuery onQueueTimerStart() const override;
-    void onQueueTimerStop(sk_gpu_test::PlatformTimerQuery) const override;
-
-    static constexpr GrGLenum GL_QUERY_RESULT            = 0x8866;
-    static constexpr GrGLenum GL_QUERY_RESULT_AVAILABLE  = 0x8867;
-    static constexpr GrGLenum GL_TIME_ELAPSED            = 0x88bf;
-    static constexpr GrGLenum GL_GPU_DISJOINT            = 0x8fbb;
-
-    typedef void (GR_GL_FUNCTION_TYPE* GLGetIntegervProc) (GrGLenum, GrGLint*);
-    typedef void (GR_GL_FUNCTION_TYPE* GLGenQueriesProc) (GrGLsizei, GrGLuint*);
-    typedef void (GR_GL_FUNCTION_TYPE* GLDeleteQueriesProc) (GrGLsizei, const GrGLuint*);
-    typedef void (GR_GL_FUNCTION_TYPE* GLBeginQueryProc) (GrGLenum, GrGLuint);
-    typedef void (GR_GL_FUNCTION_TYPE* GLEndQueryProc) (GrGLenum);
-    typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectuivProc) (GrGLuint, GrGLenum, GrGLuint*);
-    typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectui64vProc) (GrGLuint, GrGLenum, GrGLuint64*);
-
-    GLGetIntegervProc           fGLGetIntegerv;
-    GLGenQueriesProc            fGLGenQueries;
-    GLDeleteQueriesProc         fGLDeleteQueries;
-    GLBeginQueryProc            fGLBeginQuery;
-    GLEndQueryProc              fGLEndQuery;
-    GLGetQueryObjectuivProc     fGLGetQueryObjectuiv;
-    GLGetQueryObjectui64vProc   fGLGetQueryObjectui64v;
-
-
-    typedef sk_gpu_test::GpuTimer INHERITED;
-};
-
-GLGpuTimer* GLGpuTimer::CreateIfSupported(const sk_gpu_test::GLTestContext* ctx) {
-    SkAutoTDelete<GLGpuTimer> ret;
-    const GrGLInterface* gl = ctx->gl();
-    if (gl->fExtensions.has("GL_EXT_disjoint_timer_query")) {
-        ret.reset(new GLGpuTimer(true, ctx, "EXT"));
-    } else if (kGL_GrGLStandard == gl->fStandard &&
-               (GrGLGetVersion(gl) > GR_GL_VER(3,3) || gl->fExtensions.has("GL_ARB_timer_query"))) {
-        ret.reset(new GLGpuTimer(false, ctx));
-    } else if (gl->fExtensions.has("GL_EXT_timer_query")) {
-        ret.reset(new GLGpuTimer(false, ctx, "EXT"));
-    }
-    return ret && ret->validate() ? ret.release() : nullptr;
-}
-
-GLGpuTimer::GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext* ctx, const char* ext)
-    : INHERITED(disjointSupport) {
-    ctx->getGLProcAddress(&fGLGetIntegerv, "glGetIntegerv");
-    ctx->getGLProcAddress(&fGLGenQueries, "glGenQueries", ext);
-    ctx->getGLProcAddress(&fGLDeleteQueries, "glDeleteQueries", ext);
-    ctx->getGLProcAddress(&fGLBeginQuery, "glBeginQuery", ext);
-    ctx->getGLProcAddress(&fGLEndQuery, "glEndQuery", ext);
-    ctx->getGLProcAddress(&fGLGetQueryObjectuiv, "glGetQueryObjectuiv", ext);
-    ctx->getGLProcAddress(&fGLGetQueryObjectui64v, "glGetQueryObjectui64v", ext);
-}
-
-bool GLGpuTimer::validate() const {
-    return fGLGetIntegerv && fGLGenQueries && fGLDeleteQueries && fGLBeginQuery && fGLEndQuery &&
-           fGLGetQueryObjectuiv && fGLGetQueryObjectui64v;
-}
-
-sk_gpu_test::PlatformTimerQuery GLGpuTimer::onQueueTimerStart() const {
-    GrGLuint queryID;
-    fGLGenQueries(1, &queryID);
-    if (!queryID) {
-        return sk_gpu_test::kInvalidTimerQuery;
-    }
-    if (this->disjointSupport()) {
-        // Clear the disjoint flag.
-        GrGLint disjoint;
-        fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
-    }
-    fGLBeginQuery(GL_TIME_ELAPSED, queryID);
-    return static_cast<sk_gpu_test::PlatformTimerQuery>(queryID);
-}
-
-void GLGpuTimer::onQueueTimerStop(sk_gpu_test::PlatformTimerQuery platformTimer) const {
-    if (sk_gpu_test::kInvalidTimerQuery == platformTimer) {
-        return;
-    }
-    fGLEndQuery(GL_TIME_ELAPSED);
-}
-
-sk_gpu_test::GpuTimer::QueryStatus
-GLGpuTimer::checkQueryStatus(sk_gpu_test::PlatformTimerQuery platformTimer) {
-    const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
-    if (!queryID) {
-        return QueryStatus::kInvalid;
-    }
-    GrGLuint available = 0;
-    fGLGetQueryObjectuiv(queryID, GL_QUERY_RESULT_AVAILABLE, &available);
-    if (!available) {
-        return QueryStatus::kPending;
-    }
-    if (this->disjointSupport()) {
-        GrGLint disjoint = 1;
-        fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
-        if (disjoint) {
-            return QueryStatus::kDisjoint;
-        }
-    }
-    return QueryStatus::kAccurate;
-}
-
-std::chrono::nanoseconds GLGpuTimer::getTimeElapsed(sk_gpu_test::PlatformTimerQuery platformTimer) {
-    SkASSERT(this->checkQueryStatus(platformTimer) >= QueryStatus::kDisjoint);
-    const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
-    GrGLuint64 nanoseconds;
-    fGLGetQueryObjectui64v(queryID, GL_QUERY_RESULT, &nanoseconds);
-    return std::chrono::nanoseconds(nanoseconds);
-}
-
-void GLGpuTimer::deleteQuery(sk_gpu_test::PlatformTimerQuery platformTimer) {
-    const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
-    fGLDeleteQueries(1, &queryID);
-}
-
 }  // anonymous namespace

 namespace sk_gpu_test {
@ -220,7 +91,6 @@ void GLTestContext::init(const GrGLInterface* gl, FenceSync* fenceSync) {
    SkASSERT(!fGL.get());
    fGL.reset(gl);
    fFenceSync = fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this);
-    fGpuTimer = GLGpuTimer::CreateIfSupported(this);
 }

 void GLTestContext::teardown() {
--- a/tools/skpbench/_benchresult.py
+++ b/tools/skpbench/_benchresult.py
@ -25,8 +25,6 @@ class BenchResult:
                       '(?P<samples>\d+)'
                       '(?P<sample_ms_pad> +)'
                       '(?P<sample_ms>\d+)'
-                       '(?P<clock_pad> +)'
-                       '(?P<clock>[cg]pu)'
                       '(?P<metric_pad> +)'
                       '(?P<metric>ms|fps)'
                       '(?P<config_pad> +)'
@ -47,7 +45,6 @@ class BenchResult:
    self.stddev = float(match.group('stddev')[:-1]) # Drop '%' sign.
    self.samples = int(match.group('samples'))
    self.sample_ms = int(match.group('sample_ms'))
-    self.clock = match.group('clock')
    self.metric = match.group('metric')
    self.config = match.group('config')
    self.bench = match.group('bench')
@ -62,7 +59,7 @@ class BenchResult:
    else:
      values = list()
      for name in ['accum', 'median', 'max', 'min', 'stddev',
-                   'samples', 'sample_ms', 'clock', 'metric', 'config']:
+                   'samples', 'sample_ms', 'metric', 'config']:
        values.append(self.get_string(name + '_pad'))
        values.append(self.get_string(name))
      values.append(config_suffix)
--- a/tools/skpbench/parseskpbench.py
+++ b/tools/skpbench/parseskpbench.py
@ -8,8 +8,8 @@
 from __future__ import print_function
 from _benchresult import BenchResult
 from argparse import ArgumentParser
-from collections import defaultdict, namedtuple
 from datetime import datetime
+import collections
 import operator
 import os
 import sys
@ -27,7 +27,7 @@ This script can also be used to generate a Google sheet:
 (1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension:
    https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj

-(2) Update your global OS file associations to use Chrome for .csv files.
+(2) Designate Chrome os-wide as the default application for opening .csv files.

 (3) Run parseskpbench.py with the --open flag.

@ -49,92 +49,75 @@ __argparse.add_argument('sources',

 FLAGS = __argparse.parse_args()

-RESULT_QUALIFIERS = ('sample_ms', 'clock', 'metric')
-
-class FullConfig(namedtuple('fullconfig', ('config',) + RESULT_QUALIFIERS)):
-  def qualified_name(self, qualifiers=RESULT_QUALIFIERS):
-    return get_qualified_name(self.config.replace(',', ' '),
-                              {x:getattr(self, x) for x in qualifiers})
-
-def get_qualified_name(name, qualifiers):
-  if not qualifiers:
-    return name
-  else:
-    args = ('%s=%s' % (k,v) for k,v in qualifiers.iteritems())
-    return '%s (%s)' % (name, ' '.join(args))

 class Parser:
  def __init__(self):
-    self.sheet_qualifiers = {x:None for x in RESULT_QUALIFIERS}
-    self.config_qualifiers = set()
-    self.fullconfigs = list() # use list to preserve the order.
-    self.rows = defaultdict(dict)
-    self.cols = defaultdict(dict)
+    self.configs = list() # use list to preserve the order configs appear in.
+    self.rows = collections.defaultdict(dict)
+    self.cols = collections.defaultdict(dict)
+    self.metric = None
+    self.sample_ms = None

  def parse_file(self, infile):
    for line in infile:
      match = BenchResult.match(line)
      if not match:
        continue
-
-      fullconfig = FullConfig(*(match.get_string(x)
-                                for x in FullConfig._fields))
-      if not fullconfig in self.fullconfigs:
-        self.fullconfigs.append(fullconfig)
-
-      for qualifier, value in self.sheet_qualifiers.items():
-        if value is None:
-          self.sheet_qualifiers[qualifier] = match.get_string(qualifier)
-        elif value != match.get_string(qualifier):
-          del self.sheet_qualifiers[qualifier]
-          self.config_qualifiers.add(qualifier)
-
-      self.rows[match.bench][fullconfig] = match.get_string(FLAGS.result)
-      self.cols[fullconfig][match.bench] = getattr(match, FLAGS.result)
+      if self.metric is None:
+        self.metric = match.metric
+      elif match.metric != self.metric:
+        raise ValueError("results have mismatched metrics (%s and %s)" %
+                         (self.metric, match.metric))
+      if self.sample_ms is None:
+        self.sample_ms = match.sample_ms
+      elif not FLAGS.force and match.sample_ms != self.sample_ms:
+        raise ValueError("results have mismatched sampling times. "
+                         "(use --force to ignore)")
+      if not match.config in self.configs:
+        self.configs.append(match.config)
+      self.rows[match.bench][match.config] = match.get_string(FLAGS.result)
+      self.cols[match.config][match.bench] = getattr(match, FLAGS.result)

  def print_csv(self, outfile=sys.stdout):
-    # Write the title.
-    print(get_qualified_name(FLAGS.result, self.sheet_qualifiers), file=outfile)
+    print('%s_%s' % (FLAGS.result, self.metric), file=outfile)

    # Write the header.
    outfile.write('bench,')
-    for fullconfig in self.fullconfigs:
-      outfile.write('%s,' % fullconfig.qualified_name(self.config_qualifiers))
+    for config in self.configs:
+      outfile.write('%s,' % config)
    outfile.write('\n')

    # Write the rows.
-    for bench, row in self.rows.iteritems():
+    for bench, row in self.rows.items():
      outfile.write('%s,' % bench)
-      for fullconfig in self.fullconfigs:
-        if fullconfig in row:
-          outfile.write('%s,' % row[fullconfig])
+      for config in self.configs:
+        if config in row:
+          outfile.write('%s,' % row[config])
        elif FLAGS.force:
-          outfile.write('NULL,')
+          outfile.write(',')
        else:
          raise ValueError("%s: missing value for %s. (use --force to ignore)" %
-                           (bench,
-                            fullconfig.qualified_name(self.config_qualifiers)))
+                           (bench, config))
      outfile.write('\n')

    # Add simple, literal averages.
    if len(self.rows) > 1:
      outfile.write('\n')
-      self._print_computed_row('MEAN',
+      self.__print_computed_row('MEAN',
        lambda col: reduce(operator.add, col.values()) / len(col),
        outfile=outfile)
-      self._print_computed_row('GEOMEAN',
+      self.__print_computed_row('GEOMEAN',
        lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)),
        outfile=outfile)

-  def _print_computed_row(self, name, func, outfile=sys.stdout):
+  def __print_computed_row(self, name, func, outfile=sys.stdout):
    outfile.write('%s,' % name)
-    for fullconfig in self.fullconfigs:
-      if len(self.cols[fullconfig]) != len(self.rows):
-        outfile.write('NULL,')
-        continue
-      outfile.write('%.4g,' % func(self.cols[fullconfig]))
+    for config in self.configs:
+      assert(len(self.cols[config]) == len(self.rows))
+      outfile.write('%.4g,' % func(self.cols[config]))
    outfile.write('\n')

+
 def main():
  parser = Parser()

--- a/tools/skpbench/skpbench.cpp
+++ b/tools/skpbench/skpbench.cpp
@ -5,7 +5,6 @@
 * found in the LICENSE file.
 */

-#include "GpuTimer.h"
 #include "GrContextFactory.h"
 #include "SkCanvas.h"
 #include "SkOSFile.h"
@ -34,9 +33,12 @@
 * Currently, only GPU configs are supported.
 */

+using sk_gpu_test::PlatformFence;
+using sk_gpu_test::kInvalidPlatformFence;
+using sk_gpu_test::FenceSync;
+
 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
-DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
 DEFINE_bool(fps, false, "use fps instead of ms");
 DEFINE_string(skp, "", "path to a single .skp file to benchmark");
 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
@ -44,13 +46,13 @@ DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
 DEFINE_bool(suppressHeader, false, "don't print a header row before the results");

 static const char* header =
-"   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
+    "   accum    median       max       min   stddev  samples  sample_ms  metric  config    bench";

 static const char* resultFormat =
-"%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
+    "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-6s  %-9s %s";

 struct Sample {
-    using duration = std::chrono::nanoseconds;
+    using clock = std::chrono::high_resolution_clock;

    Sample() : fFrames(0), fDuration(0) {}
    double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
@ -58,13 +60,13 @@ struct Sample {
    double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
    static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }

-    int        fFrames;
-    duration   fDuration;
+    int fFrames;
+    clock::duration fDuration;
 };

 class GpuSync {
 public:
-    GpuSync(const sk_gpu_test::FenceSync* fenceSync);
+    GpuSync(const FenceSync* fenceSync);
    ~GpuSync();

    void syncToPreviousFrame();
@ -72,8 +74,8 @@ public:
 private:
    void updateFence();

-    const sk_gpu_test::FenceSync* const   fFenceSync;
-    sk_gpu_test::PlatformFence            fFence;
+    const FenceSync* const   fFenceSync;
+    PlatformFence            fFence;
 };

 enum class ExitErr {
@ -90,10 +92,10 @@ static bool mkdir_p(const SkString& name);
 static SkString join(const SkCommandLineFlags::StringArray&);
 static void exitf(ExitErr, const char* format, ...);

-static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
-                          const SkPicture* skp, std::vector<Sample>* samples) {
-    using clock = std::chrono::high_resolution_clock;
-    const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
+static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const SkPicture* skp,
+                          std::vector<Sample>* samples) {
+    using clock = Sample::clock;
+    const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);

    draw_skp_and_flush(canvas, skp);
@ -121,66 +123,6 @@ static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* can
    } while (now < endTime || 0 == samples->size() % 2);
 }

-static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
-                                   const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
-                                   const SkPicture* skp, std::vector<Sample>* samples) {
-    using sk_gpu_test::PlatformTimerQuery;
-    using clock = std::chrono::steady_clock;
-    const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
-    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
-
-    if (!gpuTimer->disjointSupport()) {
-        fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
-                        "results may be unreliable\n");
-    }
-
-    draw_skp_and_flush(canvas, skp);
-    GpuSync gpuSync(fenceSync);
-
-    gpuTimer->queueStart();
-    draw_skp_and_flush(canvas, skp);
-    PlatformTimerQuery previousTime = gpuTimer->queueStop();
-    gpuSync.syncToPreviousFrame();
-
-    clock::time_point now = clock::now();
-    const clock::time_point endTime = now + benchDuration;
-
-    do {
-        const clock::time_point sampleEndTime = now + sampleDuration;
-        samples->emplace_back();
-        Sample& sample = samples->back();
-
-        do {
-            gpuTimer->queueStart();
-            draw_skp_and_flush(canvas, skp);
-            PlatformTimerQuery time = gpuTimer->queueStop();
-            gpuSync.syncToPreviousFrame();
-
-            switch (gpuTimer->checkQueryStatus(previousTime)) {
-                using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
-                case QueryStatus::kInvalid:
-                    exitf(ExitErr::kUnavailable, "GPU timer failed");
-                case QueryStatus::kPending:
-                    exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
-                case QueryStatus::kDisjoint:
-                    if (FLAGS_verbosity >= 4) {
-                        fprintf(stderr, "discarding timer query due to disjoint operations.\n");
-                    }
-                    break;
-                case QueryStatus::kAccurate:
-                    sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
-                    ++sample.fFrames;
-                    break;
-            }
-            gpuTimer->deleteQuery(previousTime);
-            previousTime = time;
-            now = clock::now();
-        } while (now < sampleEndTime || 0 == sample.fFrames);
-    } while (now < endTime || 0 == samples->size() % 2);
-
-    gpuTimer->deleteQuery(previousTime);
-}
-
 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
    if (0 == (samples.size() % 2)) {
        exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
@ -207,8 +149,7 @@ void print_result(const std::vector<Sample>& samples, const char* config, const
    const double stddev = 100/*%*/ * sqrt(variance) / accumValue;

    printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
-           stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
-           config, bench);
+           stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, bench);
    printf("\n");
    fflush(stdout);
 }
@ -306,15 +247,7 @@ int main(int argc, char** argv) {
    // Run the benchmark.
    SkCanvas* canvas = surface->getCanvas();
    canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
-    if (!FLAGS_gpuClock) {
-        run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
-    } else {
-        if (!testCtx->gpuTimingSupport()) {
-            exitf(ExitErr::kUnavailable, "GPU does not support timing");
-        }
-        run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
-                               &samples);
-    }
+    run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
    print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str());

    // Save a proof (if one was requested).
@ -367,7 +300,7 @@ static void exitf(ExitErr err, const char* format, ...) {
    exit((int)err);
 }

-GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
+GpuSync::GpuSync(const FenceSync* fenceSync)
    : fFenceSync(fenceSync) {
    this->updateFence();
 }
@ -377,7 +310,7 @@ GpuSync::~GpuSync() {
 }

 void GpuSync::syncToPreviousFrame() {
-    if (sk_gpu_test::kInvalidFence == fFence) {
+    if (kInvalidPlatformFence == fFence) {
        exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
    }
    if (!fFenceSync->waitFence(fFence)) {
@ -389,7 +322,7 @@ void GpuSync::syncToPreviousFrame() {

 void GpuSync::updateFence() {
    fFence = fFenceSync->insertFence();
-    if (sk_gpu_test::kInvalidFence == fFence) {
+    if (kInvalidPlatformFence == fFence) {
        exitf(ExitErr::kUnavailable, "failed to insert fence");
    }
 }
--- a/tools/skpbench/skpbench.py
+++ b/tools/skpbench/skpbench.py
@ -32,8 +32,7 @@ unacceptable stddev.
 __argparse.add_argument('--adb',
    action='store_true', help="execute skpbench over adb")
 __argparse.add_argument('-s', '--device-serial',
-    help="if using adb, ID of the specific device to target "
-         "(only required if more than 1 device is attached)")
+    help="if using adb, id of the specific device to target")
 __argparse.add_argument('-p', '--path',
    help="directory to execute ./skpbench from")
 __argparse.add_argument('-m', '--max-stddev',
@ -48,10 +47,7 @@ __argparse.add_argument('-v','--verbosity',
 __argparse.add_argument('-d', '--duration',
    type=int, help="number of milliseconds to run each benchmark")
 __argparse.add_argument('-l', '--sample-ms',
-    type=int, help="duration of a sample (minimum)")
-__argparse.add_argument('--gpu',
-    action='store_true',
-    help="perform timing on the gpu clock instead of cpu (gpu work only)")
+    type=int, help="minimum duration of a sample")
 __argparse.add_argument('--fps',
    action='store_true', help="use fps instead of ms")
 __argparse.add_argument('-c', '--config',
@ -97,8 +93,6 @@ class SKPBench:
    ARGV.extend(['--duration', str(FLAGS.duration)])
  if FLAGS.sample_ms:
    ARGV.extend(['--sampleMs', str(FLAGS.sample_ms)])
-  if FLAGS.gpu:
-    ARGV.extend(['--gpuClock', 'true'])
  if FLAGS.fps:
    ARGV.extend(['--fps', 'true'])
  if FLAGS.path:
@ -194,7 +188,7 @@ class SKPBench:

  def terminate(self):
    if self._proc:
-      self._proc.terminate()
+      self._proc.kill()
      self._monitor.join()
      self._proc.wait()
      self._proc = None