It looks like we're not always running long enough to give the GPU counters time to converge (and possibly the same for CPU too, but GPU is definitely worse off).

This CL changes our convergence logic from
  - did the last run take more than x milliseconds?
to
  - did the last run take more x milliseconds and are the last two runs within y% of each other?

There's also now an upper limit where we bail out with an error if we haven't yet met the convergence criteria.  Keeping the lower bound is important for benches where the constant overhead is much larger than the work done in the loop; without it we'll see T(1 loop) == T(2 loops) and converge way too early.

This CL also exposed that DeferredCanvasBench had a bug: it was running N^2 loops when we told it to run N.  (My fault.)

I threw in a couple other linty changes that I'd be happy to split off.

BUG=
R=robertphillips@google.com

Review URL: https://codereview.chromium.org/23536046

git-svn-id: http://skia.googlecode.com/svn/trunk@11267 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
mtklein@google.com 2013-09-13 20:11:09 +00:00
parent 3ad5dd009f
commit dbd41c8857
2 changed files with 43 additions and 25 deletions

View File

@ -32,11 +32,7 @@ protected:
device->unref();
initDeferredCanvas(deferredCanvas);
for (int i = 0; i < this->getLoops(); i++) {
drawInDeferredCanvas(deferredCanvas);
}
drawInDeferredCanvas(deferredCanvas);
finalizeDeferredCanvas(deferredCanvas);
deferredCanvas->flush();
}

View File

@ -37,7 +37,9 @@ enum BenchMode {
kRecord_BenchMode,
kPictureRecord_BenchMode
};
const char* BenchMode_Name[] = { "normal", "deferred", "deferredSilent", "record", "picturerecord" };
const char* BenchMode_Name[] = {
"normal", "deferred", "deferredSilent", "record", "picturerecord"
};
///////////////////////////////////////////////////////////////////////////////
@ -271,8 +273,23 @@ DEFINE_string(mode, "normal",
"picturerecord: draw from an SkPicture to an SkPicture.\n");
DEFINE_string(config, "", "Run configs given. If empty, runs the defaults set in gConfigs.");
DEFINE_string(logFile, "", "Also write stdout here.");
DEFINE_int32(benchMs, 20, "Target time in ms to run each benchmark config.");
DEFINE_int32(minMs, 20, "Shortest time we'll allow a benchmark to run.");
DEFINE_int32(maxMs, 4000, "Longest time we'll allow a benchmark to run.");
DEFINE_double(error, 0.01,
"Ratio of subsequent bench measurements must drop within 1±error to converge.");
DEFINE_string(timeFormat, "%9.2f", "Format to print results, in milliseconds per 1000 loops.");
DEFINE_bool2(verbose, v, false, "Print more.");
// Has this bench converged? First arguments are milliseconds / loop iteration,
// last is overall runtime in milliseconds.
static bool HasConverged(double prevPerLoop, double currPerLoop, double currRaw) {
if (currRaw < FLAGS_minMs) {
return false;
}
const double low = 1 - FLAGS_error, high = 1 + FLAGS_error;
const double ratio = currPerLoop / prevPerLoop;
return low < ratio && ratio < high;
}
int tool_main(int argc, char** argv);
int tool_main(int argc, char** argv) {
@ -339,20 +356,17 @@ int tool_main(int argc, char** argv) {
if (kGPU_Backend == config.backend) {
GrContext* context = gContextFactory.get(config.contextType);
if (NULL == context) {
SkString error;
error.printf("Error creating GrContext for config %s. Config will be skipped.\n",
config.name);
logger.logError(error);
logger.logError(SkStringPrintf(
"Error creating GrContext for config %s. Config will be skipped.\n",
config.name));
configs.remove(i);
--i;
continue;
}
if (config.sampleCount > context->getMaxSampleCount()){
SkString error;
error.printf("Sample count (%d) for config %s is unsupported. "
"Config will be skipped.\n",
config.sampleCount, config.name);
logger.logError(error);
logger.logError(SkStringPrintf(
"Sample count (%d) for config %s is unsupported. Config will be skipped.\n",
config.sampleCount, config.name));
configs.remove(i);
--i;
continue;
@ -484,9 +498,8 @@ int tool_main(int argc, char** argv) {
config.sampleCount,
context));
if (!device.get()) {
SkString error;
error.printf("Device creation failure for config %s. Will skip.\n", config.name);
logger.logError(error);
logger.logError(SkStringPrintf(
"Device creation failure for config %s. Will skip.\n", config.name));
continue;
}
@ -536,20 +549,23 @@ int tool_main(int argc, char** argv) {
BenchTimer timer;
#endif
double previous = 1.0/0.0;
bool converged = false;
bench->setLoops(0);
if (FLAGS_verbose) { SkDebugf("%s %s: ", bench->getName(), config.name); }
do {
// Ramp up 1 -> 4 -> 16 -> ... -> ~1 billion.
const int loops = bench->getLoops();
if (loops >= (1<<30)) {
if (loops >= (1<<30) || timer.fWall > FLAGS_maxMs) {
// If you find it takes more than a billion loops to get up to 20ms of runtime,
// you've got a computer clocked at several THz or have a broken benchmark. ;)
// "1B ought to be enough for anybody."
SkString str;
str.printf("Can't ramp %s to %dms.\n", bench->getName(), FLAGS_benchMs);
logger.logError(str);
logger.logError(SkStringPrintf(
"Can't get %s %s to converge in %dms.\n",
bench->getName(), config.name, FLAGS_maxMs));
break;
}
bench->setLoops(loops == 0 ? 1 : loops * 4);
bench->setLoops(loops == 0 ? 1 : loops * 2);
if ((benchMode == kRecord_BenchMode || benchMode == kPictureRecord_BenchMode)) {
// Clear the recorded commands so that they do not accumulate.
@ -586,7 +602,13 @@ int tool_main(int argc, char** argv) {
}
#endif
timer.end();
} while (!kIsDebug && timer.fWall < FLAGS_benchMs); // One loop only in debug mode.
const double current = timer.fWall / bench->getLoops();
if (FLAGS_verbose && current > previous) { SkDebugf(""); }
if (FLAGS_verbose) { SkDebugf("%.3g ", current); }
converged = HasConverged(previous, current, timer.fWall);
previous = current;
} while (!kIsDebug && !converged);
if (FLAGS_verbose) { SkDebugf("\n"); }
if (FLAGS_outDir.count() && kNonRendering_Backend != config.backend) {
saveFile(bench->getName(),