2016-01-14 19:05:22 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2016 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
2016-01-15 18:00:08 +00:00
|
|
|
#include "GrCaps.h"
|
|
|
|
#include "GrContextFactory.h"
|
2016-01-14 19:05:22 +00:00
|
|
|
#include "Benchmark.h"
|
2016-01-15 20:07:39 +00:00
|
|
|
#include "ResultsWriter.h"
|
2016-01-14 19:05:22 +00:00
|
|
|
#include "SkCommandLineFlags.h"
|
|
|
|
#include "SkOSFile.h"
|
|
|
|
#include "SkStream.h"
|
2016-01-15 18:00:08 +00:00
|
|
|
#include "SkSurface.h"
|
|
|
|
#include "SkTime.h"
|
2016-01-28 14:26:35 +00:00
|
|
|
#include "SkTLList.h"
|
|
|
|
#include "SkThreadUtils.h"
|
2016-01-15 18:00:08 +00:00
|
|
|
#include "Stats.h"
|
|
|
|
#include "Timer.h"
|
2016-01-14 19:05:22 +00:00
|
|
|
#include "VisualSKPBench.h"
|
2016-01-15 18:00:08 +00:00
|
|
|
#include "gl/GrGLDefines.h"
|
2016-01-28 14:26:35 +00:00
|
|
|
#include "../private/SkMutex.h"
|
|
|
|
#include "../private/SkSemaphore.h"
|
|
|
|
#include "../private/SkGpuFenceSync.h"
|
2016-01-14 19:05:22 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
// posix only for now
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
|
2016-01-14 19:05:22 +00:00
|
|
|
/*
|
|
|
|
* This is an experimental GPU only benchmarking program. The initial implementation will only
|
|
|
|
* support SKPs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// To get image decoders linked in we have to do the below magic
|
|
|
|
#include "SkForceLinking.h"
|
|
|
|
#include "SkImageDecoder.h"
|
|
|
|
__SK_FORCE_IMAGE_DECODER_LINKING;
|
|
|
|
|
2016-01-15 18:00:08 +00:00
|
|
|
static const int kAutoTuneLoops = 0;
|
|
|
|
|
|
|
|
static const int kDefaultLoops =
|
|
|
|
#ifdef SK_DEBUG
|
|
|
|
1;
|
|
|
|
#else
|
|
|
|
kAutoTuneLoops;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static SkString loops_help_txt() {
|
|
|
|
SkString help;
|
|
|
|
help.printf("Number of times to run each bench. Set this to %d to auto-"
|
|
|
|
"tune for each bench. Timings are only reported when auto-tuning.",
|
|
|
|
kAutoTuneLoops);
|
|
|
|
return help;
|
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_string(skps, "skps", "Directory to read skps from.");
|
2016-01-14 19:05:22 +00:00
|
|
|
DEFINE_string2(match, m, nullptr,
|
|
|
|
"[~][^]substring[$] [...] of GM name to run.\n"
|
|
|
|
"Multiple matches may be separated by spaces.\n"
|
|
|
|
"~ causes a matching bench to always be skipped\n"
|
|
|
|
"^ requires the start of the bench to match\n"
|
|
|
|
"$ requires the end of the bench to match\n"
|
|
|
|
"^ and $ requires an exact match\n"
|
|
|
|
"If a bench does not match any list entry,\n"
|
|
|
|
"it is skipped unless some list entry starts with ~");
|
2016-01-15 18:00:08 +00:00
|
|
|
DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag.");
|
|
|
|
DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
|
|
|
|
DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this.");
|
|
|
|
DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());
|
|
|
|
DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
|
|
|
|
DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs.");
|
2016-01-28 14:26:35 +00:00
|
|
|
DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together");
|
|
|
|
DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in one process");
|
2016-01-14 19:05:22 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
static SkString humanize(double ms) {
|
|
|
|
return HumanizeMs(ms);
|
|
|
|
}
|
|
|
|
#define HUMANIZE(ms) humanize(ms).c_str()
|
|
|
|
|
2016-01-14 19:05:22 +00:00
|
|
|
namespace kilobench {
|
|
|
|
class BenchmarkStream {
|
|
|
|
public:
|
|
|
|
BenchmarkStream() : fCurrentSKP(0) {
|
|
|
|
for (int i = 0; i < FLAGS_skps.count(); i++) {
|
|
|
|
if (SkStrEndsWith(FLAGS_skps[i], ".skp")) {
|
|
|
|
fSKPs.push_back() = FLAGS_skps[i];
|
|
|
|
} else {
|
|
|
|
SkOSFile::Iter it(FLAGS_skps[i], ".skp");
|
|
|
|
SkString path;
|
|
|
|
while (it.next(&path)) {
|
|
|
|
fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Benchmark* next() {
|
|
|
|
Benchmark* bench = nullptr;
|
|
|
|
// skips non matching benches
|
|
|
|
while ((bench = this->innerNext()) &&
|
|
|
|
(SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName()) ||
|
|
|
|
!bench->isSuitableFor(Benchmark::kGPU_Backend))) {
|
|
|
|
delete bench;
|
|
|
|
}
|
|
|
|
return bench;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
static bool ReadPicture(const char* path, SkAutoTUnref<SkPicture>* pic) {
|
|
|
|
// Not strictly necessary, as it will be checked again later,
|
|
|
|
// but helps to avoid a lot of pointless work if we're going to skip it.
|
|
|
|
if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkAutoTDelete<SkStream> stream(SkStream::NewFromFile(path));
|
|
|
|
if (stream.get() == nullptr) {
|
|
|
|
SkDebugf("Could not read %s.\n", path);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
pic->reset(SkPicture::CreateFromStream(stream.get()));
|
|
|
|
if (pic->get() == nullptr) {
|
|
|
|
SkDebugf("Could not read %s as an SkPicture.\n", path);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Benchmark* innerNext() {
|
|
|
|
// Render skps
|
|
|
|
while (fCurrentSKP < fSKPs.count()) {
|
|
|
|
const SkString& path = fSKPs[fCurrentSKP++];
|
|
|
|
SkAutoTUnref<SkPicture> pic;
|
|
|
|
if (!ReadPicture(path.c_str(), &pic)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkString name = SkOSPath::Basename(path.c_str());
|
|
|
|
return new VisualSKPBench(name.c_str(), pic.get());
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkTArray<SkString> fSKPs;
|
|
|
|
int fCurrentSKP;
|
|
|
|
};
|
|
|
|
|
2016-01-15 18:00:08 +00:00
|
|
|
struct GPUTarget {
|
|
|
|
void setup() {
|
2016-01-28 14:26:35 +00:00
|
|
|
fGL->makeCurrent();
|
2016-01-15 18:00:08 +00:00
|
|
|
// Make sure we're done with whatever came before.
|
2016-01-28 14:26:35 +00:00
|
|
|
SK_GL(*fGL, Finish());
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; }
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
void endTiming(bool usePlatformSwapBuffers) {
|
|
|
|
if (fGL) {
|
|
|
|
SK_GL(*fGL, Flush());
|
|
|
|
if (usePlatformSwapBuffers) {
|
|
|
|
fGL->swapBuffers();
|
|
|
|
} else {
|
|
|
|
fGL->waitOnSyncOrSwap();
|
|
|
|
}
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
}
|
2016-01-28 14:26:35 +00:00
|
|
|
void finish() {
|
|
|
|
SK_GL(*fGL, Finish());
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool needsFrameTiming(int* maxFrameLag) const {
|
2016-01-28 14:26:35 +00:00
|
|
|
if (!fGL->getMaxGpuFrameLag(maxFrameLag)) {
|
2016-01-15 18:00:08 +00:00
|
|
|
// Frame lag is unknown.
|
|
|
|
*maxFrameLag = FLAGS_gpuFrameLag;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText,
|
|
|
|
GrContextFactory::GLContextType ctxType,
|
|
|
|
GrContextFactory::GLContextOptions ctxOptions, int numSamples) {
|
|
|
|
GrContext* context = factory->get(ctxType, ctxOptions);
|
|
|
|
int maxRTSize = context->caps()->maxRenderTargetSize();
|
|
|
|
SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSize),
|
|
|
|
SkTMin(bench->getSize().fY, maxRTSize),
|
|
|
|
kN32_SkColorType, kPremul_SkAlphaType);
|
|
|
|
uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag :
|
|
|
|
0;
|
|
|
|
SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
|
2016-01-28 14:26:35 +00:00
|
|
|
fSurface.reset(SkSurface::NewRenderTarget(context,
|
|
|
|
SkSurface::kNo_Budgeted, info,
|
|
|
|
numSamples, &props));
|
|
|
|
fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext;
|
|
|
|
if (!fSurface.get()) {
|
2016-01-15 18:00:08 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Kilobench should only be used on platforms with fence sync support
|
2016-01-28 14:26:35 +00:00
|
|
|
SkASSERT(fGL->fenceSyncSupport());
|
2016-01-15 18:00:08 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkCanvas* getCanvas() const {
|
2016-01-28 14:26:35 +00:00
|
|
|
if (!fSurface.get()) {
|
2016-01-15 18:00:08 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
2016-01-28 14:26:35 +00:00
|
|
|
return fSurface->getCanvas();
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool capturePixels(SkBitmap* bmp) {
|
|
|
|
SkCanvas* canvas = this->getCanvas();
|
|
|
|
if (!canvas) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
bmp->setInfo(canvas->imageInfo());
|
|
|
|
if (!canvas->readPixels(bmp, 0, 0)) {
|
|
|
|
SkDebugf("Can't read canvas pixels.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
SkGLContext* gl() { return fGL; }
|
|
|
|
|
2016-01-15 18:00:08 +00:00
|
|
|
private:
|
2016-01-28 14:26:35 +00:00
|
|
|
SkGLContext* fGL;
|
|
|
|
SkAutoTDelete<SkSurface> fSurface;
|
2016-01-15 18:00:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static bool write_canvas_png(GPUTarget* target, const SkString& filename) {
|
|
|
|
|
|
|
|
if (filename.isEmpty()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (target->getCanvas() &&
|
|
|
|
kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkBitmap bmp;
|
|
|
|
|
|
|
|
if (!target->capturePixels(&bmp)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkString dir = SkOSPath::Dirname(filename.c_str());
|
|
|
|
if (!sk_mkdir(dir.c_str())) {
|
|
|
|
SkDebugf("Can't make dir %s.\n", dir.c_str());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
SkFILEWStream stream(filename.c_str());
|
|
|
|
if (!stream.isValid()) {
|
|
|
|
SkDebugf("Can't write %s.\n", filename.c_str());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 100)) {
|
|
|
|
SkDebugf("Can't encode a PNG.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int detect_forever_loops(int loops) {
|
|
|
|
// look for a magic run-forever value
|
|
|
|
if (loops < 0) {
|
|
|
|
loops = SK_MaxS32;
|
|
|
|
}
|
|
|
|
return loops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int clamp_loops(int loops) {
|
|
|
|
if (loops < 1) {
|
|
|
|
SkDebugf("ERROR: clamping loops from %d to 1. "
|
|
|
|
"There's probably something wrong with the bench.\n", loops);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (loops > FLAGS_maxLoops) {
|
|
|
|
SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loops, FLAGS_maxLoops);
|
|
|
|
return FLAGS_maxLoops;
|
|
|
|
}
|
|
|
|
return loops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static double now_ms() { return SkTime::GetNSecs() * 1e-6; }
|
2016-01-28 14:26:35 +00:00
|
|
|
|
|
|
|
struct TimingThread {
|
|
|
|
TimingThread(SkGLContext* mainContext)
|
|
|
|
: fFenceSync(mainContext->fenceSync())
|
|
|
|
, fMainContext(mainContext)
|
|
|
|
, fDone(false) {}
|
|
|
|
|
|
|
|
static void Loop(void* data) {
|
|
|
|
TimingThread* timingThread = reinterpret_cast<TimingThread*>(data);
|
|
|
|
timingThread->timingLoop();
|
|
|
|
}
|
|
|
|
|
|
|
|
// To ensure waiting for the sync actually does something, we check to make sure the we exceed
|
|
|
|
// some small value
|
|
|
|
const double kMinElapsed = 1e-6;
|
|
|
|
bool sanity(double start) const {
|
|
|
|
double elapsed = now_ms() - start;
|
|
|
|
return elapsed > kMinElapsed;
|
|
|
|
}
|
|
|
|
|
|
|
|
void waitFence(SkPlatformGpuFence sync) {
|
|
|
|
SkDEBUGCODE(double start = now_ms());
|
|
|
|
fFenceSync->waitFence(sync, false);
|
|
|
|
SkASSERT(sanity(start));
|
|
|
|
}
|
|
|
|
|
|
|
|
void timingLoop() {
|
|
|
|
// Create a context which shares display lists with the main thread
|
|
|
|
SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrGLStandard,
|
|
|
|
fMainContext));
|
|
|
|
glContext->makeCurrent();
|
|
|
|
|
|
|
|
// Basic timing methodology is:
|
|
|
|
// 1) Wait on semaphore until main thread indicates its time to start timing the frame
|
|
|
|
// 2) Wait on frame start sync, record time. This is start of the frame.
|
|
|
|
// 3) Wait on semaphore until main thread indicates its time to finish timing the frame
|
|
|
|
// 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTime = frame time
|
|
|
|
// 5) Wait on semaphore until main thread indicates we should time the next frame or quit
|
|
|
|
while (true) {
|
|
|
|
fSemaphore.wait();
|
|
|
|
|
|
|
|
// get start sync
|
|
|
|
SkPlatformGpuFence startSync = this->popStartSync();
|
|
|
|
|
|
|
|
// wait on sync
|
|
|
|
this->waitFence(startSync);
|
|
|
|
double start = kilobench::now_ms();
|
|
|
|
|
|
|
|
// do we want to sleep here?
|
|
|
|
// wait for end sync
|
|
|
|
fSemaphore.wait();
|
|
|
|
|
|
|
|
// get end sync
|
|
|
|
SkPlatformGpuFence endSync = this->popEndSync();
|
|
|
|
|
|
|
|
// wait on sync
|
|
|
|
this->waitFence(endSync);
|
|
|
|
double elapsed = kilobench::now_ms() - start;
|
|
|
|
|
|
|
|
// No mutex needed, client won't touch timings until we're done
|
|
|
|
fTimings.push_back(elapsed);
|
|
|
|
|
|
|
|
// clean up fences
|
|
|
|
fFenceSync->deleteFence(startSync);
|
|
|
|
fFenceSync->deleteFence(endSync);
|
|
|
|
|
|
|
|
fSemaphore.wait();
|
|
|
|
if (this->isDone()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); }
|
|
|
|
|
|
|
|
SkPlatformGpuFence popStartSync() {
|
|
|
|
return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }
|
|
|
|
|
|
|
|
SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }
|
|
|
|
|
|
|
|
void setDone() {
|
|
|
|
SkAutoMutexAcquire done(fDoneMutex);
|
|
|
|
fDone = true;
|
|
|
|
fSemaphore.signal();
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue;
|
|
|
|
|
|
|
|
void pushSync(SyncQueue* queue, SkMutex* mutex) {
|
|
|
|
SkAutoMutexAcquire am(mutex);
|
|
|
|
*queue->addToHead() = fFenceSync->insertFence();
|
|
|
|
fSemaphore.signal();
|
|
|
|
}
|
|
|
|
|
|
|
|
SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) {
|
|
|
|
SkAutoMutexAcquire am(mutex);
|
|
|
|
SkPlatformGpuFence sync = *queue->head();
|
|
|
|
queue->popHead();
|
|
|
|
return sync;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isDone() {
|
|
|
|
SkAutoMutexAcquire am1(fFrameStartSyncsMutex);
|
|
|
|
SkAutoMutexAcquire done(fDoneMutex);
|
|
|
|
if (fDone && fFrameStartSyncs.isEmpty()) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
2016-01-28 14:26:35 +00:00
|
|
|
|
|
|
|
const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
SkGpuFenceSync* fFenceSync;
|
|
|
|
SkSemaphore fSemaphore;
|
|
|
|
SkMutex fFrameStartSyncsMutex;
|
|
|
|
SyncQueue fFrameStartSyncs;
|
|
|
|
SkMutex fFrameEndSyncsMutex;
|
|
|
|
SyncQueue fFrameEndSyncs;
|
|
|
|
SkTArray<double> fTimings;
|
|
|
|
SkMutex fDoneMutex;
|
|
|
|
SkGLContext* fMainContext;
|
|
|
|
bool fDone;
|
|
|
|
};
|
|
|
|
|
|
|
|
static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) {
|
|
|
|
SkCanvas* canvas = target->getCanvas();
|
|
|
|
canvas->clear(SK_ColorWHITE);
|
2016-01-15 18:00:08 +00:00
|
|
|
bench->preDraw(canvas);
|
2016-01-28 14:26:35 +00:00
|
|
|
|
|
|
|
if (timingThread) {
|
|
|
|
timingThread->pushStartSync();
|
|
|
|
}
|
2016-01-15 18:00:08 +00:00
|
|
|
double start = now_ms();
|
|
|
|
canvas = target->beginTiming(canvas);
|
|
|
|
bench->draw(loops, canvas);
|
2016-01-28 14:26:35 +00:00
|
|
|
canvas->flush();
|
|
|
|
target->endTiming(timingThread ? true : false);
|
|
|
|
|
2016-01-15 18:00:08 +00:00
|
|
|
double elapsed = now_ms() - start;
|
2016-01-28 14:26:35 +00:00
|
|
|
if (timingThread) {
|
|
|
|
timingThread->pushEndSync();
|
|
|
|
timingThread->setDone();
|
|
|
|
}
|
2016-01-15 18:00:08 +00:00
|
|
|
bench->postDraw(canvas);
|
|
|
|
return elapsed;
|
|
|
|
}
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
// TODO For now we don't use the background timing thread to tune loops
|
2016-01-15 18:00:08 +00:00
|
|
|
static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameLag) {
|
|
|
|
// First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
|
|
|
|
int loops = bench->calculateLoops(FLAGS_loops);
|
|
|
|
if (kAutoTuneLoops == loops) {
|
|
|
|
loops = 1;
|
|
|
|
double elapsed = 0;
|
|
|
|
do {
|
|
|
|
if (1<<30 == loops) {
|
|
|
|
// We're about to wrap. Something's wrong with the bench.
|
|
|
|
loops = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
loops *= 2;
|
|
|
|
// If the GPU lets frames lag at all, we need to make sure we're timing
|
|
|
|
// _this_ round, not still timing last round.
|
|
|
|
for (int i = 0; i < maxGpuFrameLag; i++) {
|
2016-01-28 14:26:35 +00:00
|
|
|
elapsed = time(loops, bench, target, nullptr);
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
} while (elapsed < FLAGS_gpuMs);
|
|
|
|
|
|
|
|
// We've overshot at least a little. Scale back linearly.
|
|
|
|
loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);
|
|
|
|
loops = clamp_loops(loops);
|
|
|
|
|
|
|
|
// Make sure we're not still timing our calibration.
|
2016-01-28 14:26:35 +00:00
|
|
|
target->finish();
|
2016-01-15 18:00:08 +00:00
|
|
|
} else {
|
|
|
|
loops = detect_forever_loops(loops);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pretty much the same deal as the calibration: do some warmup to make
|
|
|
|
// sure we're timing steady-state pipelined frames.
|
|
|
|
for (int i = 0; i < maxGpuFrameLag - 1; i++) {
|
2016-01-28 14:26:35 +00:00
|
|
|
time(loops, bench, target, nullptr);
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return loops;
|
|
|
|
}
|
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
struct AutoSetupContextBenchAndTarget {
|
|
|
|
AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) {
|
|
|
|
GrContextOptions grContextOpts;
|
|
|
|
fCtxFactory.reset(new GrContextFactory(grContextOpts));
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
SkAssertResult(fTarget.init(bench, fCtxFactory, false,
|
|
|
|
GrContextFactory::kNative_GLContextType,
|
|
|
|
GrContextFactory::kNone_GLContextOptions, 0));
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
fCanvas = fTarget.getCanvas();
|
|
|
|
fTarget.setup();
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
bench->perCanvasPreDraw(fCanvas);
|
|
|
|
fTarget.needsFrameTiming(&fMaxFrameLag);
|
|
|
|
}
|
|
|
|
|
|
|
|
int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); }
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
double timeSample(int loops, TimingThread* timingThread) {
|
2016-01-15 20:07:39 +00:00
|
|
|
for (int i = 0; i < fMaxFrameLag; i++) {
|
2016-01-28 14:26:35 +00:00
|
|
|
time(loops, fBenchmark, &fTarget, timingThread);
|
2016-01-15 20:07:39 +00:00
|
|
|
}
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
return time(loops, fBenchmark, &fTarget, timingThread) / loops;
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
2016-01-28 14:26:35 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); }
|
|
|
|
|
|
|
|
SkAutoTDelete<GrContextFactory> fCtxFactory;
|
|
|
|
GPUTarget fTarget;
|
|
|
|
SkCanvas* fCanvas;
|
|
|
|
Benchmark* fBenchmark;
|
|
|
|
int fMaxFrameLag;
|
|
|
|
};
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
int setup_loops(Benchmark* bench) {
|
|
|
|
AutoSetupContextBenchAndTarget ascbt(bench);
|
|
|
|
int loops = ascbt.getLoops();
|
|
|
|
ascbt.teardownBench();
|
2016-01-15 18:00:08 +00:00
|
|
|
|
|
|
|
if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) {
|
|
|
|
SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu");
|
|
|
|
pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName());
|
|
|
|
pngFilename.append(".png");
|
2016-01-15 20:07:39 +00:00
|
|
|
write_canvas_png(&ascbt.fTarget, pngFilename);
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
2016-01-15 20:07:39 +00:00
|
|
|
return loops;
|
2016-01-15 18:00:08 +00:00
|
|
|
}
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
struct Sample {
|
|
|
|
double fCpu;
|
|
|
|
double fGpu;
|
|
|
|
};
|
|
|
|
|
|
|
|
Sample time_sample(Benchmark* bench, int loops) {
|
2016-01-15 20:07:39 +00:00
|
|
|
AutoSetupContextBenchAndTarget ascbt(bench);
|
2016-01-28 14:26:35 +00:00
|
|
|
|
|
|
|
Sample sample;
|
|
|
|
if (FLAGS_useBackgroundThread) {
|
|
|
|
TimingThread timingThread(ascbt.fTarget.gl());
|
|
|
|
SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &timingThread));
|
|
|
|
nativeThread->start();
|
|
|
|
sample.fCpu = ascbt.timeSample(loops, &timingThread);
|
|
|
|
nativeThread->join();
|
|
|
|
|
|
|
|
// return the min
|
|
|
|
double min = SK_ScalarMax;
|
|
|
|
for (int i = 0; i < timingThread.timings().count(); i++) {
|
|
|
|
min = SkTMin(min, timingThread.timings()[i]);
|
|
|
|
}
|
|
|
|
sample.fGpu = min;
|
|
|
|
} else {
|
|
|
|
sample.fCpu = ascbt.timeSample(loops, nullptr);
|
|
|
|
}
|
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
ascbt.teardownBench();
|
2016-01-14 19:05:22 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
return sample;
|
|
|
|
}
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
} // namespace kilobench
|
|
|
|
|
|
|
|
static const int kOutResultSize = 1024;
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
void printResult(const SkTArray<double>& samples, int loops, const char* name, const char* mod) {
|
|
|
|
SkString newName(name);
|
|
|
|
newName.appendf("_%s", mod);
|
|
|
|
Stats stats(samples);
|
|
|
|
const double stddev_percent = 100 * sqrt(stats.var) / stats.mean;
|
|
|
|
SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n"
|
|
|
|
, loops
|
|
|
|
, HUMANIZE(stats.min)
|
|
|
|
, HUMANIZE(stats.median)
|
|
|
|
, HUMANIZE(stats.mean)
|
|
|
|
, HUMANIZE(stats.max)
|
|
|
|
, stddev_percent
|
|
|
|
, stats.plot.c_str()
|
|
|
|
, "gpu"
|
|
|
|
, newName.c_str()
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
int kilobench_main() {
|
2016-01-14 19:05:22 +00:00
|
|
|
kilobench::BenchmarkStream benchStream;
|
2016-01-15 18:00:08 +00:00
|
|
|
|
|
|
|
SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n",
|
|
|
|
FLAGS_samples, "samples");
|
|
|
|
|
2016-01-15 20:07:39 +00:00
|
|
|
int descriptors[2];
|
|
|
|
if (pipe(descriptors) != 0) {
|
|
|
|
SkFAIL("Failed to open a pipe\n");
|
|
|
|
}
|
|
|
|
|
2016-01-14 19:05:22 +00:00
|
|
|
while (Benchmark* b = benchStream.next()) {
|
|
|
|
SkAutoTDelete<Benchmark> bench(b);
|
2016-01-15 18:00:08 +00:00
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
int loops = 1;
|
|
|
|
SkTArray<double> cpuSamples;
|
|
|
|
SkTArray<double> gpuSamples;
|
2016-01-15 20:07:39 +00:00
|
|
|
for (int i = 0; i < FLAGS_samples + 1; i++) {
|
|
|
|
// We fork off a new process to setup the grcontext and run the test while we wait
|
2016-01-28 14:26:35 +00:00
|
|
|
if (FLAGS_useMultiProcess) {
|
|
|
|
int childPid = fork();
|
|
|
|
if (childPid > 0) {
|
|
|
|
char result[kOutResultSize];
|
|
|
|
if (read(descriptors[0], result, kOutResultSize) < 0) {
|
|
|
|
SkFAIL("Failed to read from pipe\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
// if samples == 0 then parse # of loops
|
|
|
|
// else parse float
|
|
|
|
if (i == 0) {
|
|
|
|
sscanf(result, "%d", &loops);
|
|
|
|
} else {
|
|
|
|
sscanf(result, "%lf %lf", &cpuSamples.push_back(),
|
|
|
|
&gpuSamples.push_back());
|
|
|
|
}
|
|
|
|
|
|
|
|
// wait until exit
|
|
|
|
int status;
|
|
|
|
waitpid(childPid, &status, 0);
|
|
|
|
} else if (0 == childPid) {
|
|
|
|
char result[kOutResultSize];
|
|
|
|
if (i == 0) {
|
|
|
|
sprintf(result, "%d", kilobench::setup_loops(bench));
|
|
|
|
} else {
|
|
|
|
kilobench::Sample sample = kilobench::time_sample(bench, loops);
|
|
|
|
sprintf(result, "%lf %lf", sample.fCpu, sample.fGpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure to write the null terminator
|
|
|
|
if (write(descriptors[1], result, strlen(result) + 1) < 0) {
|
|
|
|
SkFAIL("Failed to write to pipe\n");
|
|
|
|
}
|
|
|
|
return 0;
|
2016-01-15 20:07:39 +00:00
|
|
|
} else {
|
2016-01-28 14:26:35 +00:00
|
|
|
SkFAIL("Fork failed\n");
|
2016-01-15 20:07:39 +00:00
|
|
|
}
|
2016-01-28 14:26:35 +00:00
|
|
|
} else {
|
2016-01-15 20:07:39 +00:00
|
|
|
if (i == 0) {
|
2016-01-28 14:26:35 +00:00
|
|
|
loops = kilobench::setup_loops(bench);
|
2016-01-15 20:07:39 +00:00
|
|
|
} else {
|
2016-01-28 14:26:35 +00:00
|
|
|
kilobench::Sample sample = kilobench::time_sample(bench, loops);
|
|
|
|
cpuSamples.push_back(sample.fCpu);
|
|
|
|
gpuSamples.push_back(sample.fGpu);
|
2016-01-15 20:07:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-28 14:26:35 +00:00
|
|
|
printResult(cpuSamples, loops, bench->getUniqueName(), "cpu");
|
|
|
|
if (FLAGS_useBackgroundThread) {
|
|
|
|
printResult(gpuSamples, loops, bench->getUniqueName(), "gpu");
|
|
|
|
}
|
2016-01-15 20:07:39 +00:00
|
|
|
}
|
2016-01-14 19:05:22 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if !defined SK_BUILD_FOR_IOS
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
SkCommandLineFlags::Parse(argc, argv);
|
|
|
|
return kilobench_main();
|
|
|
|
}
|
|
|
|
#endif
|