Allow benchmarks to provide the standard deviation.
Some benchmarks include their own runner which provides an overall average and a standard deviation. This enables extraction of that value similar to the other measurements. These benchmarks should only be run once. If a benchmarks specifies multiple runs and provides a standard deviation, a warning will be issued that makes the build fail on the buildbot side. TEST=python -m unittest run_benchmarks_test BUG=393947 LOG=n R=jkummerow@chromium.org Review URL: https://codereview.chromium.org/395633012 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22424 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
f89f5f134a
commit
522beebb1a
@ -156,6 +156,7 @@ class DefaultSentinel(Node):
|
||||
self.flags = []
|
||||
self.resources = []
|
||||
self.results_regexp = None
|
||||
self.stddev_regexp = None
|
||||
self.units = "score"
|
||||
|
||||
|
||||
@ -196,6 +197,13 @@ class Graph(Node):
|
||||
regexp_default = None
|
||||
self.results_regexp = suite.get("results_regexp", regexp_default)
|
||||
|
||||
# A similar regular expression for the standard deviation (optional).
|
||||
if parent.stddev_regexp:
|
||||
stddev_default = parent.stddev_regexp % suite["name"]
|
||||
else:
|
||||
stddev_default = None
|
||||
self.stddev_regexp = suite.get("stddev_regexp", stddev_default)
|
||||
|
||||
|
||||
class Trace(Graph):
|
||||
"""Represents a leaf in the benchmark suite tree structure.
|
||||
@ -207,6 +215,7 @@ class Trace(Graph):
|
||||
assert self.results_regexp
|
||||
self.results = []
|
||||
self.errors = []
|
||||
self.stddev = ""
|
||||
|
||||
def ConsumeOutput(self, stdout):
|
||||
try:
|
||||
@ -216,11 +225,22 @@ class Trace(Graph):
|
||||
self.errors.append("Regexp \"%s\" didn't match for benchmark %s."
|
||||
% (self.results_regexp, self.graphs[-1]))
|
||||
|
||||
try:
|
||||
if self.stddev_regexp and self.stddev:
|
||||
self.errors.append("Benchmark %s should only run once since a stddev "
|
||||
"is provided by the benchmark." % self.graphs[-1])
|
||||
if self.stddev_regexp:
|
||||
self.stddev = re.search(self.stddev_regexp, stdout, re.M).group(1)
|
||||
except:
|
||||
self.errors.append("Regexp \"%s\" didn't match for benchmark %s."
|
||||
% (self.stddev_regexp, self.graphs[-1]))
|
||||
|
||||
def GetResults(self):
|
||||
return Results([{
|
||||
"graphs": self.graphs,
|
||||
"units": self.units,
|
||||
"results": self.results,
|
||||
"stddev": self.stddev,
|
||||
}], self.errors)
|
||||
|
||||
|
||||
|
@ -135,8 +135,9 @@ class BenchmarksTest(unittest.TestCase):
|
||||
self.assertEquals([
|
||||
{"units": units,
|
||||
"graphs": [suite, trace["name"]],
|
||||
"results": trace["results"]} for trace in traces],
|
||||
self._LoadResults()["traces"])
|
||||
"results": trace["results"],
|
||||
"stddev": trace["stddev"]} for trace in traces],
|
||||
self._LoadResults()["traces"])
|
||||
|
||||
def _VerifyErrors(self, errors):
|
||||
self.assertEquals(errors, self._LoadResults()["errors"])
|
||||
@ -159,8 +160,8 @@ class BenchmarksTest(unittest.TestCase):
|
||||
self._MockCommand(["."], ["x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n"])
|
||||
self.assertEquals(0, self._CallMain())
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": ["1.234"]},
|
||||
{"name": "DeltaBlue", "results": ["10657567"]},
|
||||
{"name": "Richards", "results": ["1.234"], "stddev": ""},
|
||||
{"name": "DeltaBlue", "results": ["10657567"], "stddev": ""},
|
||||
])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
|
||||
@ -176,8 +177,8 @@ class BenchmarksTest(unittest.TestCase):
|
||||
"Richards: 50\nDeltaBlue: 300\n"])
|
||||
self.assertEquals(0, self._CallMain())
|
||||
self._VerifyResults("v8", "ms", [
|
||||
{"name": "Richards", "results": ["50", "100"]},
|
||||
{"name": "DeltaBlue", "results": ["300", "200"]},
|
||||
{"name": "Richards", "results": ["50", "100"], "stddev": ""},
|
||||
{"name": "DeltaBlue", "results": ["300", "200"], "stddev": ""},
|
||||
])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
|
||||
@ -194,8 +195,8 @@ class BenchmarksTest(unittest.TestCase):
|
||||
"Richards: 50\nDeltaBlue: 300\n"])
|
||||
self.assertEquals(0, self._CallMain())
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": ["50", "100"]},
|
||||
{"name": "DeltaBlue", "results": ["300", "200"]},
|
||||
{"name": "Richards", "results": ["50", "100"], "stddev": ""},
|
||||
{"name": "DeltaBlue", "results": ["300", "200"], "stddev": ""},
|
||||
])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
|
||||
@ -213,13 +214,16 @@ class BenchmarksTest(unittest.TestCase):
|
||||
self.assertEquals([
|
||||
{"units": "score",
|
||||
"graphs": ["test", "Richards"],
|
||||
"results": ["50", "100"]},
|
||||
"results": ["50", "100"],
|
||||
"stddev": ""},
|
||||
{"units": "ms",
|
||||
"graphs": ["test", "Sub", "Leaf"],
|
||||
"results": ["3", "2", "1"]},
|
||||
"results": ["3", "2", "1"],
|
||||
"stddev": ""},
|
||||
{"units": "score",
|
||||
"graphs": ["test", "DeltaBlue"],
|
||||
"results": ["200"]},
|
||||
"results": ["200"],
|
||||
"stddev": ""},
|
||||
], self._LoadResults()["traces"])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMockMultiple(
|
||||
@ -232,13 +236,50 @@ class BenchmarksTest(unittest.TestCase):
|
||||
(path.join("out", "x64.release", "d8"), "--flag", "run.js"),
|
||||
(path.join("out", "x64.release", "d8"), "--flag", "--flag2", "run.js"))
|
||||
|
||||
def testOneRunStdDevRegExp(self):
|
||||
test_input = dict(V8_JSON)
|
||||
test_input["stddev_regexp"] = "^%s\-stddev: (.+)$"
|
||||
self._WriteTestInput(test_input)
|
||||
self._MockCommand(["."], ["Richards: 1.234\nRichards-stddev: 0.23\n"
|
||||
"DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n"])
|
||||
self.assertEquals(0, self._CallMain())
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": ["1.234"], "stddev": "0.23"},
|
||||
{"name": "DeltaBlue", "results": ["10657567"], "stddev": "106"},
|
||||
])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
|
||||
|
||||
def testTwoRunsStdDevRegExp(self):
|
||||
test_input = dict(V8_JSON)
|
||||
test_input["stddev_regexp"] = "^%s\-stddev: (.+)$"
|
||||
test_input["run_count"] = 2
|
||||
self._WriteTestInput(test_input)
|
||||
self._MockCommand(["."], ["Richards: 3\nRichards-stddev: 0.7\n"
|
||||
"DeltaBlue: 6\nDeltaBlue-boom: 0.9\n",
|
||||
"Richards: 2\nRichards-stddev: 0.5\n"
|
||||
"DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n"])
|
||||
self.assertEquals(1, self._CallMain())
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": ["2", "3"], "stddev": "0.7"},
|
||||
{"name": "DeltaBlue", "results": ["5", "6"], "stddev": "0.8"},
|
||||
])
|
||||
self._VerifyErrors(
|
||||
["Benchmark Richards should only run once since a stddev is provided "
|
||||
"by the benchmark.",
|
||||
"Benchmark DeltaBlue should only run once since a stddev is provided "
|
||||
"by the benchmark.",
|
||||
"Regexp \"^DeltaBlue\-stddev: (.+)$\" didn't match for benchmark "
|
||||
"DeltaBlue."])
|
||||
self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
|
||||
|
||||
def testBuildbot(self):
|
||||
self._WriteTestInput(V8_JSON)
|
||||
self._MockCommand(["."], ["Richards: 1.234\nDeltaBlue: 10657567\n"])
|
||||
self.assertEquals(0, self._CallMain("--buildbot"))
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": ["1.234"]},
|
||||
{"name": "DeltaBlue", "results": ["10657567"]},
|
||||
{"name": "Richards", "results": ["1.234"], "stddev": ""},
|
||||
{"name": "DeltaBlue", "results": ["10657567"], "stddev": ""},
|
||||
])
|
||||
self._VerifyErrors([])
|
||||
self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js")
|
||||
@ -248,8 +289,8 @@ class BenchmarksTest(unittest.TestCase):
|
||||
self._MockCommand(["."], ["x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n"])
|
||||
self.assertEquals(1, self._CallMain())
|
||||
self._VerifyResults("test", "score", [
|
||||
{"name": "Richards", "results": []},
|
||||
{"name": "DeltaBlue", "results": ["10657567"]},
|
||||
{"name": "Richards", "results": [], "stddev": ""},
|
||||
{"name": "DeltaBlue", "results": ["10657567"], "stddev": ""},
|
||||
])
|
||||
self._VerifyErrors(
|
||||
["Regexp \"^Richards: (.+)$\" didn't match for benchmark Richards."])
|
||||
|
Loading…
Reference in New Issue
Block a user