[tools] Implement confidence-based number of runs

R=machenbach@chromium.org, tmrts@chromium.org

Bug: chromium:880724
Change-Id: I2b8ede244fa09868eef384b967223a3788ddd2a1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1581180
Commit-Queue: Sergiy Belozorov <sergiyb@chromium.org>
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61370}
This commit is contained in:
Sergiy Belozorov 2019-05-09 11:05:37 +02:00 committed by Commit Bot
parent 615d61bfc7
commit 80451b07bc
6 changed files with 228 additions and 79 deletions

View File

@ -66,3 +66,11 @@ wheel: <
name: "infra/python/wheels/mock-py2_py3" name: "infra/python/wheels/mock-py2_py3"
version: "version:2.0.0" version: "version:2.0.0"
> >
# Used by:
# tools/run_perf.py
# tools/unittests/run_perf_test.py
wheel: <
name: "infra/python/wheels/numpy/${vpython_platform}"
version: "version:1.11.3"
>

View File

@ -3875,6 +3875,12 @@ group("gn_all") {
} }
} }
group("v8_python_base") {
data = [
".vpython",
]
}
group("v8_clusterfuzz") { group("v8_clusterfuzz") {
testonly = true testonly = true

View File

@ -44,6 +44,7 @@ group("v8_perf") {
testonly = true testonly = true
data_deps = [ data_deps = [
"..:v8_python_base",
"cctest:cctest", "cctest:cctest",
"..:d8", "..:d8",
"../tools:v8_android_test_runner_deps", "../tools:v8_android_test_runner_deps",

View File

@ -43,6 +43,7 @@ group("v8_testrunner") {
testonly = true testonly = true
data_deps = [ data_deps = [
"..:v8_python_base",
"..:v8_dump_build_config", "..:v8_dump_build_config",
":v8_android_test_runner_deps", ":v8_android_test_runner_deps",
] ]

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
# Copyright 2014 the V8 project authors. All rights reserved. # Copyright 2014 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
@ -120,6 +119,8 @@ import sys
import time import time
import traceback import traceback
import numpy
from testrunner.local import android from testrunner.local import android
from testrunner.local import command from testrunner.local import command
from testrunner.local import utils from testrunner.local import utils
@ -142,6 +143,7 @@ RESULT_STDDEV_RE = re.compile(r'^\{([^\}]+)\}$')
RESULT_LIST_RE = re.compile(r'^\[([^\]]+)\]$') RESULT_LIST_RE = re.compile(r'^\[([^\]]+)\]$')
TOOLS_BASE = os.path.abspath(os.path.dirname(__file__)) TOOLS_BASE = os.path.abspath(os.path.dirname(__file__))
INFRA_FAILURE_RETCODE = 87 INFRA_FAILURE_RETCODE = 87
MIN_RUNS_FOR_CONFIDENCE = 10
def GeometricMean(values): def GeometricMean(values):
@ -150,7 +152,7 @@ def GeometricMean(values):
The mean is calculated using log to avoid overflow. The mean is calculated using log to avoid overflow.
""" """
values = map(float, values) values = map(float, values)
return str(math.exp(sum(map(math.log, values)) / len(values))) return math.exp(sum(map(math.log, values)) / len(values))
class ResultTracker(object): class ResultTracker(object):
@ -241,6 +243,42 @@ class ResultTracker(object):
with open(file_name, 'w') as f: with open(file_name, 'w') as f:
f.write(json.dumps(self.ToDict())) f.write(json.dumps(self.ToDict()))
def HasEnoughRuns(self, graph_config, confidence_level):
"""Checks if the mean of the results for a given trace config is within
0.1% of the true value with the specified confidence level.
This assumes Gaussian distribution of the noise and based on
https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule.
Args:
graph_config: An instance of GraphConfig.
confidence_level: Number of standard deviations from the mean that all
values must lie within. Typical values are 1, 2 and 3 and correspond
to 68%, 95% and 99.7% probability that the measured value is within
0.1% of the true value.
Returns:
True if specified confidence level have been achieved.
"""
if not isinstance(graph_config, TraceConfig):
return all(self.HasEnoughRuns(child, confidence_level)
for child in graph_config.children)
trace = self.traces.get(graph_config.name, {})
results = trace.get('results', [])
logging.debug('HasEnoughRuns for %s', graph_config.name)
if len(results) < MIN_RUNS_FOR_CONFIDENCE:
logging.debug(' Ran %d times, need at least %d',
len(results), MIN_RUNS_FOR_CONFIDENCE)
return False
logging.debug(' Results: %d entries', len(results))
mean = numpy.mean(results)
mean_stderr = numpy.std(results) / numpy.sqrt(len(results))
logging.debug(' Mean: %.2f, mean_stderr: %.2f', mean, mean_stderr)
return confidence_level * mean_stderr < mean / 1000.0
def __str__(self): # pragma: no cover def __str__(self): # pragma: no cover
return json.dumps(self.ToDict(), indent=2, separators=(',', ': ')) return json.dumps(self.ToDict(), indent=2, separators=(',', ': '))
@ -383,8 +421,8 @@ class TraceConfig(GraphConfig):
stddev = None stddev = None
try: try:
result = str(float( result = float(
re.search(self.results_regexp, output.stdout, re.M).group(1))) re.search(self.results_regexp, output.stdout, re.M).group(1))
except ValueError: except ValueError:
result_tracker.AddError( result_tracker.AddError(
'Regexp "%s" returned a non-numeric for test %s.' % 'Regexp "%s" returned a non-numeric for test %s.' %
@ -740,6 +778,7 @@ class AndroidPlatform(Platform): # pragma: no cover
output.duration = time.time() - start output.duration = time.time() - start
return output return output
class CustomMachineConfiguration: class CustomMachineConfiguration:
def __init__(self, disable_aslr = False, governor = None): def __init__(self, disable_aslr = False, governor = None):
self.aslr_backup = None self.aslr_backup = None
@ -844,6 +883,12 @@ class CustomMachineConfiguration:
raise Exception('Could not set CPU governor. Present value is %s' raise Exception('Could not set CPU governor. Present value is %s'
% cur_value ) % cur_value )
class MaxTotalDurationReachedError(Exception):
"""Exception used to stop running tests when max total duration is reached."""
pass
def Main(argv): def Main(argv):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--arch', parser.add_argument('--arch',
@ -900,12 +945,28 @@ def Main(argv):
'--filter=JSTests/TypedArrays/ will run only TypedArray ' '--filter=JSTests/TypedArrays/ will run only TypedArray '
'benchmarks from the JSTests suite.', 'benchmarks from the JSTests suite.',
default='') default='')
parser.add_argument('--confidence-level', type=int,
help='Repeatedly runs each benchmark until specified '
'confidence level is reached. The value is interpreted '
'as the number of standard deviations from the mean that '
'all values must lie within. Typical values are 1, 2 and '
'3 and correspond to 68%, 95% and 99.7% probability that '
'the measured value is within 0.1% of the true value. '
'Larger values result in more retries and thus longer '
'runtime, but also provide more reliable results. Also '
'see --max-total-duration flag.')
parser.add_argument('--max-total-duration', type=int, default=7140, # 1h 59m
help='Max total duration in seconds allowed for retries '
'across all tests. This is especially useful in '
'combination with the --confidence-level flag.')
parser.add_argument('--dump-logcats-to', parser.add_argument('--dump-logcats-to',
help='Writes logcat output from each test into specified ' help='Writes logcat output from each test into specified '
'directory. Only supported for android targets.') 'directory. Only supported for android targets.')
parser.add_argument("--run-count", type=int, default=0, parser.add_argument('--run-count', type=int, default=0,
help="Override the run count specified by the test " help='Override the run count specified by the test '
"suite. The default 0 uses the suite's config.") 'suite. The default 0 uses the suite\'s config.')
parser.add_argument('-v', '--verbose', default=False, action='store_true',
help='Be verbose and print debug output.')
parser.add_argument('suite', nargs='+', help='Path to the suite config file.') parser.add_argument('suite', nargs='+', help='Path to the suite config file.')
try: try:
@ -914,7 +975,8 @@ def Main(argv):
return INFRA_FAILURE_RETCODE return INFRA_FAILURE_RETCODE
logging.basicConfig( logging.basicConfig(
level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s') level=logging.DEBUG if args.verbose else logging.INFO,
format='%(asctime)s %(levelname)-8s %(message)s')
if args.arch == 'auto': # pragma: no cover if args.arch == 'auto': # pragma: no cover
args.arch = utils.DefaultArch() args.arch = utils.DefaultArch()
@ -973,8 +1035,7 @@ def Main(argv):
result_tracker = ResultTracker() result_tracker = ResultTracker()
result_tracker_secondary = ResultTracker() result_tracker_secondary = ResultTracker()
# We use list here to allow modification in nested function below. have_failed_tests = False
have_failed_tests = [False]
with CustomMachineConfiguration(governor = args.cpu_governor, with CustomMachineConfiguration(governor = args.cpu_governor,
disable_aslr = args.noaslr) as conf: disable_aslr = args.noaslr) as conf:
for path in args.suite: for path in args.suite:
@ -1000,6 +1061,8 @@ def Main(argv):
platform.PreTests(node, path) platform.PreTests(node, path)
# Traverse graph/trace tree and iterate over all runnables. # Traverse graph/trace tree and iterate over all runnables.
start = time.time()
try:
for runnable in FlattenRunnables(root, NodeCB): for runnable in FlattenRunnables(root, NodeCB):
runnable_name = '/'.join(runnable.graphs) runnable_name = '/'.join(runnable.graphs)
if (not runnable_name.startswith(args.filter) and if (not runnable_name.startswith(args.filter) and
@ -1007,9 +1070,27 @@ def Main(argv):
continue continue
logging.info('>>> Running suite: %s', runnable_name) logging.info('>>> Running suite: %s', runnable_name)
def RunGenerator(runnable):
if args.confidence_level:
counter = 0
while not result_tracker.HasEnoughRuns(
runnable, args.confidence_level):
yield counter
counter += 1
else:
for i in range(0, max(1, args.run_count or runnable.run_count)): for i in range(0, max(1, args.run_count or runnable.run_count)):
yield i
for i in RunGenerator(runnable):
attempts_left = runnable.retry_count + 1 attempts_left = runnable.retry_count + 1
while attempts_left: while attempts_left:
total_duration = time.time() - start
if total_duration > args.max_total_duration:
logging.info(
'>>> Stopping now since running for too long (%ds > %ds)',
total_duration, args.max_total_duration)
raise MaxTotalDurationReachedError()
output, output_secondary = platform.Run( output, output_secondary = platform.Run(
runnable, i, secondary=args.shell_dir_secondary) runnable, i, secondary=args.shell_dir_secondary)
result_tracker.AddRunnableDuration(runnable, output.duration) result_tracker.AddRunnableDuration(runnable, output.duration)
@ -1025,7 +1106,7 @@ def Main(argv):
attempts_left -= 1 attempts_left -= 1
if not attempts_left: # ignore failures until last attempt if not attempts_left: # ignore failures until last attempt
have_failed_tests[0] = True have_failed_tests = True
else: else:
logging.info('>>> Retrying suite: %s', runnable_name) logging.info('>>> Retrying suite: %s', runnable_name)
@ -1033,6 +1114,8 @@ def Main(argv):
result_tracker.timeouts.append(runnable_name) result_tracker.timeouts.append(runnable_name)
if runnable.has_near_timeouts: if runnable.has_near_timeouts:
result_tracker.near_timeouts.append(runnable_name) result_tracker.near_timeouts.append(runnable_name)
except MaxTotalDurationReachedError:
have_failed_tests = True
platform.PostExecution() platform.PostExecution()
@ -1048,7 +1131,7 @@ def Main(argv):
print('Secondary results:', result_tracker_secondary) print('Secondary results:', result_tracker_secondary)
if (result_tracker.errors or result_tracker_secondary.errors or if (result_tracker.errors or result_tracker_secondary.errors or
have_failed_tests[0]): have_failed_tests):
return 1 return 1
return 0 return 0

View File

@ -7,9 +7,7 @@
from __future__ import print_function from __future__ import print_function
from collections import namedtuple from collections import namedtuple
import coverage
import json import json
import mock
import os import os
import platform import platform
import shutil import shutil
@ -18,6 +16,9 @@ import sys
import tempfile import tempfile
import unittest import unittest
import coverage
import mock
# Requires python-coverage and python-mock. Native python coverage # Requires python-coverage and python-mock. Native python coverage
# version >= 3.7.1 should be installed to get the best speed. # version >= 3.7.1 should be installed to get the best speed.
@ -208,8 +209,8 @@ class PerfTest(unittest.TestCase):
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n']) self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
self.assertEqual(0, self._CallMain()) self.assertEqual(0, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyRunnableDurations(1, 60) self._VerifyRunnableDurations(1, 60)
self._VerifyErrors([]) self._VerifyErrors([])
@ -223,8 +224,8 @@ class PerfTest(unittest.TestCase):
self._MockCommand(['.'], ['Richards: 1.234\nDeltaBlue: 10657567']) self._MockCommand(['.'], ['Richards: 1.234\nDeltaBlue: 10657567'])
self.assertEqual(0, self._CallMain()) self.assertEqual(0, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join( self._VerifyMock(os.path.join(
@ -241,8 +242,8 @@ class PerfTest(unittest.TestCase):
'Richards: 50\nDeltaBlue: 300\n']) 'Richards: 50\nDeltaBlue: 300\n'])
self.assertEqual(0, self._CallMain()) self.assertEqual(0, self._CallMain())
self._VerifyResults('v8', 'ms', [ self._VerifyResults('v8', 'ms', [
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''}, {'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['300.0', '200.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [300.0, 200.0], 'stddev': ''},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join( self._VerifyMock(os.path.join(
@ -260,8 +261,57 @@ class PerfTest(unittest.TestCase):
'Richards: 50\nDeltaBlue: 300\n']) 'Richards: 50\nDeltaBlue: 300\n'])
self.assertEqual(0, self._CallMain()) self.assertEqual(0, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''}, {'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['300.0', '200.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [300.0, 200.0], 'stddev': ''},
])
self._VerifyErrors([])
self._VerifyMock(os.path.join(
'out', 'x64.release', 'd7'), '--flag', 'run.js')
def testPerfectConfidenceRuns(self):
self._WriteTestInput(V8_JSON)
self._MockCommand(
['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'] * 10)
self.assertEqual(0, self._CallMain('--confidence-level', '1'))
self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': [1.234] * 10, 'stddev': ''},
{'name': 'DeltaBlue', 'results': [10657567.0] * 10, 'stddev': ''},
])
self._VerifyErrors([])
self._VerifyMock(os.path.join(
'out', 'x64.release', 'd7'), '--flag', 'run.js')
def testNoisyConfidenceRuns(self):
self._WriteTestInput(V8_JSON)
self._MockCommand(
['.'],
reversed([
# First 10 runs are mandatory. DeltaBlue is slightly noisy.
'x\nRichards: 1.234\nDeltaBlue: 10757567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10557567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
# Need 4 more runs for confidence in DeltaBlue results.
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
]),
)
self.assertEqual(0, self._CallMain('--confidence-level', '1'))
self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': [1.234] * 14, 'stddev': ''},
{
'name': 'DeltaBlue',
'results': [10757567.0, 10557567.0] + [10657567.0] * 12,
'stddev': '',
},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join( self._VerifyMock(os.path.join(
@ -280,15 +330,15 @@ class PerfTest(unittest.TestCase):
self.assertListEqual(sorted([ self.assertListEqual(sorted([
{'units': 'score', {'units': 'score',
'graphs': ['test', 'Richards'], 'graphs': ['test', 'Richards'],
'results': ['50.0', '100.0'], 'results': [50.0, 100.0],
'stddev': ''}, 'stddev': ''},
{'units': 'ms', {'units': 'ms',
'graphs': ['test', 'Sub', 'Leaf'], 'graphs': ['test', 'Sub', 'Leaf'],
'results': ['3.0', '2.0', '1.0'], 'results': [3.0, 2.0, 1.0],
'stddev': ''}, 'stddev': ''},
{'units': 'score', {'units': 'score',
'graphs': ['test', 'DeltaBlue'], 'graphs': ['test', 'DeltaBlue'],
'results': ['200.0'], 'results': [200.0],
'stddev': ''}, 'stddev': ''},
]), sorted(self._LoadResults()['traces'])) ]), sorted(self._LoadResults()['traces']))
self._VerifyErrors([]) self._VerifyErrors([])
@ -309,8 +359,8 @@ class PerfTest(unittest.TestCase):
'DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n']) 'DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n'])
self.assertEqual(0, self._CallMain()) self.assertEqual(0, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': '0.23'}, {'name': 'Richards', 'results': [1.234], 'stddev': '0.23'},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': '106'}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': '106'},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock( self._VerifyMock(
@ -327,8 +377,8 @@ class PerfTest(unittest.TestCase):
'DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n']) 'DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n'])
self.assertEqual(1, self._CallMain()) self.assertEqual(1, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['2.0', '3.0'], 'stddev': '0.7'}, {'name': 'Richards', 'results': [2.0, 3.0], 'stddev': '0.7'},
{'name': 'DeltaBlue', 'results': ['5.0', '6.0'], 'stddev': '0.8'}, {'name': 'DeltaBlue', 'results': [5.0, 6.0], 'stddev': '0.8'},
]) ])
self._VerifyErrors( self._VerifyErrors(
['Test test/Richards should only run once since a stddev is provided ' ['Test test/Richards should only run once since a stddev is provided '
@ -348,8 +398,8 @@ class PerfTest(unittest.TestCase):
mock.MagicMock(return_value={'is_android': False})).start() mock.MagicMock(return_value={'is_android': False})).start()
self.assertEqual(0, self._CallMain('--buildbot')) self.assertEqual(0, self._CallMain('--buildbot'))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js') self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
@ -364,9 +414,9 @@ class PerfTest(unittest.TestCase):
mock.MagicMock(return_value={'is_android': False})).start() mock.MagicMock(return_value={'is_android': False})).start()
self.assertEqual(0, self._CallMain('--buildbot')) self.assertEqual(0, self._CallMain('--buildbot'))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
{'name': 'Total', 'results': ['3626.49109719'], 'stddev': ''}, {'name': 'Total', 'results': [3626.491097190233], 'stddev': ''},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js') self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
@ -381,7 +431,7 @@ class PerfTest(unittest.TestCase):
mock.MagicMock(return_value={'is_android': False})).start() mock.MagicMock(return_value={'is_android': False})).start()
self.assertEqual(1, self._CallMain('--buildbot')) self.assertEqual(1, self._CallMain('--buildbot'))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyErrors( self._VerifyErrors(
['Regexp "^Richards: (.+)$" ' ['Regexp "^Richards: (.+)$" '
@ -395,7 +445,7 @@ class PerfTest(unittest.TestCase):
self._MockCommand(['.'], ['x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n']) self._MockCommand(['.'], ['x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n'])
self.assertEqual(1, self._CallMain()) self.assertEqual(1, self._CallMain())
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyErrors( self._VerifyErrors(
['Regexp "^Richards: (.+)$" did not match for test test/Richards.']) ['Regexp "^Richards: (.+)$" did not match for test test/Richards.'])
@ -442,8 +492,8 @@ class PerfTest(unittest.TestCase):
return_value={'is_android': True}).start() return_value={'is_android': True}).start()
self.assertEqual(0, self._CallMain('--arch', 'arm')) self.assertEqual(0, self._CallMain('--arch', 'arm'))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
def testTwoRuns_Trybot(self): def testTwoRuns_Trybot(self):
@ -462,12 +512,12 @@ class PerfTest(unittest.TestCase):
'--json-test-results-secondary', test_output_secondary, '--json-test-results-secondary', test_output_secondary,
)) ))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['100.0', '200.0'], 'stddev': ''}, {'name': 'Richards', 'results': [100.0, 200.0], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['20.0', '20.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [20.0, 20.0], 'stddev': ''},
]) ])
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''}, {'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['200.0', '200.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [200.0, 200.0], 'stddev': ''},
], test_output_secondary) ], test_output_secondary)
self._VerifyRunnableDurations(2, 60, test_output_secondary) self._VerifyRunnableDurations(2, 60, test_output_secondary)
self._VerifyErrors([]) self._VerifyErrors([])
@ -486,8 +536,8 @@ class PerfTest(unittest.TestCase):
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n']) self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
self.assertEqual(0, self._CallMain('--extra-flags=--prof')) self.assertEqual(0, self._CallMain('--extra-flags=--prof'))
self._VerifyResults('test', 'score', [ self._VerifyResults('test', 'score', [
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''}, {'name': 'Richards', 'results': [1.234], 'stddev': ''},
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''}, {'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
]) ])
self._VerifyErrors([]) self._VerifyErrors([])
self._VerifyMock(os.path.join('out', 'x64.release', 'd7'), self._VerifyMock(os.path.join('out', 'x64.release', 'd7'),
@ -514,13 +564,13 @@ class PerfTest(unittest.TestCase):
{ {
'units': 'score', 'units': 'score',
'graphs': ['test1', 'Richards'], 'graphs': ['test1', 'Richards'],
'results': [u'1.2', u'1.2'], 'results': [1.2, 1.2],
'stddev': '', 'stddev': '',
}, },
{ {
'units': 'score', 'units': 'score',
'graphs': ['test1', 'DeltaBlue'], 'graphs': ['test1', 'DeltaBlue'],
'results': [u'2.1', u'2.1'], 'results': [2.1, 2.1],
'stddev': '', 'stddev': '',
}, },
]), sorted(results['traces'])) ]), sorted(results['traces']))
@ -532,13 +582,13 @@ class PerfTest(unittest.TestCase):
{ {
'units': 'score', 'units': 'score',
'graphs': ['test2', 'Richards'], 'graphs': ['test2', 'Richards'],
'results': [u'1.2', u'1.2'], 'results': [1.2, 1.2],
'stddev': '', 'stddev': '',
}, },
{ {
'units': 'score', 'units': 'score',
'graphs': ['test2', 'DeltaBlue'], 'graphs': ['test2', 'DeltaBlue'],
'results': [u'2.1', u'2.1'], 'results': [2.1, 2.1],
'stddev': '', 'stddev': '',
}, },
], results['traces']) ], results['traces'])
@ -550,13 +600,13 @@ class PerfTest(unittest.TestCase):
{ {
'units': 'score', 'units': 'score',
'graphs': ['test3', 'Octane', 'Richards'], 'graphs': ['test3', 'Octane', 'Richards'],
'results': [u'1.2'], 'results': [1.2],
'stddev': '', 'stddev': '',
}, },
{ {
'units': 'score', 'units': 'score',
'graphs': ['test3', 'Octane', 'DeltaBlue'], 'graphs': ['test3', 'Octane', 'DeltaBlue'],
'results': [u'2.1'], 'results': [2.1],
'stddev': '', 'stddev': '',
}, },
], results['traces']) ], results['traces'])