[tools] Implement confidence-based number of runs
R=machenbach@chromium.org, tmrts@chromium.org Bug: chromium:880724 Change-Id: I2b8ede244fa09868eef384b967223a3788ddd2a1 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1581180 Commit-Queue: Sergiy Belozorov <sergiyb@chromium.org> Reviewed-by: Michael Achenbach <machenbach@chromium.org> Cr-Commit-Position: refs/heads/master@{#61370}
This commit is contained in:
parent
615d61bfc7
commit
80451b07bc
8
.vpython
8
.vpython
@ -66,3 +66,11 @@ wheel: <
|
|||||||
name: "infra/python/wheels/mock-py2_py3"
|
name: "infra/python/wheels/mock-py2_py3"
|
||||||
version: "version:2.0.0"
|
version: "version:2.0.0"
|
||||||
>
|
>
|
||||||
|
|
||||||
|
# Used by:
|
||||||
|
# tools/run_perf.py
|
||||||
|
# tools/unittests/run_perf_test.py
|
||||||
|
wheel: <
|
||||||
|
name: "infra/python/wheels/numpy/${vpython_platform}"
|
||||||
|
version: "version:1.11.3"
|
||||||
|
>
|
||||||
|
6
BUILD.gn
6
BUILD.gn
@ -3875,6 +3875,12 @@ group("gn_all") {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
group("v8_python_base") {
|
||||||
|
data = [
|
||||||
|
".vpython",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
group("v8_clusterfuzz") {
|
group("v8_clusterfuzz") {
|
||||||
testonly = true
|
testonly = true
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@ group("v8_perf") {
|
|||||||
testonly = true
|
testonly = true
|
||||||
|
|
||||||
data_deps = [
|
data_deps = [
|
||||||
|
"..:v8_python_base",
|
||||||
"cctest:cctest",
|
"cctest:cctest",
|
||||||
"..:d8",
|
"..:d8",
|
||||||
"../tools:v8_android_test_runner_deps",
|
"../tools:v8_android_test_runner_deps",
|
||||||
|
@ -43,6 +43,7 @@ group("v8_testrunner") {
|
|||||||
testonly = true
|
testonly = true
|
||||||
|
|
||||||
data_deps = [
|
data_deps = [
|
||||||
|
"..:v8_python_base",
|
||||||
"..:v8_dump_build_config",
|
"..:v8_dump_build_config",
|
||||||
":v8_android_test_runner_deps",
|
":v8_android_test_runner_deps",
|
||||||
]
|
]
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# Copyright 2014 the V8 project authors. All rights reserved.
|
# Copyright 2014 the V8 project authors. All rights reserved.
|
||||||
# Use of this source code is governed by a BSD-style license that can be
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
# found in the LICENSE file.
|
# found in the LICENSE file.
|
||||||
@ -120,6 +119,8 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
|
||||||
from testrunner.local import android
|
from testrunner.local import android
|
||||||
from testrunner.local import command
|
from testrunner.local import command
|
||||||
from testrunner.local import utils
|
from testrunner.local import utils
|
||||||
@ -142,6 +143,7 @@ RESULT_STDDEV_RE = re.compile(r'^\{([^\}]+)\}$')
|
|||||||
RESULT_LIST_RE = re.compile(r'^\[([^\]]+)\]$')
|
RESULT_LIST_RE = re.compile(r'^\[([^\]]+)\]$')
|
||||||
TOOLS_BASE = os.path.abspath(os.path.dirname(__file__))
|
TOOLS_BASE = os.path.abspath(os.path.dirname(__file__))
|
||||||
INFRA_FAILURE_RETCODE = 87
|
INFRA_FAILURE_RETCODE = 87
|
||||||
|
MIN_RUNS_FOR_CONFIDENCE = 10
|
||||||
|
|
||||||
|
|
||||||
def GeometricMean(values):
|
def GeometricMean(values):
|
||||||
@ -150,7 +152,7 @@ def GeometricMean(values):
|
|||||||
The mean is calculated using log to avoid overflow.
|
The mean is calculated using log to avoid overflow.
|
||||||
"""
|
"""
|
||||||
values = map(float, values)
|
values = map(float, values)
|
||||||
return str(math.exp(sum(map(math.log, values)) / len(values)))
|
return math.exp(sum(map(math.log, values)) / len(values))
|
||||||
|
|
||||||
|
|
||||||
class ResultTracker(object):
|
class ResultTracker(object):
|
||||||
@ -241,6 +243,42 @@ class ResultTracker(object):
|
|||||||
with open(file_name, 'w') as f:
|
with open(file_name, 'w') as f:
|
||||||
f.write(json.dumps(self.ToDict()))
|
f.write(json.dumps(self.ToDict()))
|
||||||
|
|
||||||
|
def HasEnoughRuns(self, graph_config, confidence_level):
|
||||||
|
"""Checks if the mean of the results for a given trace config is within
|
||||||
|
0.1% of the true value with the specified confidence level.
|
||||||
|
|
||||||
|
This assumes Gaussian distribution of the noise and based on
|
||||||
|
https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
graph_config: An instance of GraphConfig.
|
||||||
|
confidence_level: Number of standard deviations from the mean that all
|
||||||
|
values must lie within. Typical values are 1, 2 and 3 and correspond
|
||||||
|
to 68%, 95% and 99.7% probability that the measured value is within
|
||||||
|
0.1% of the true value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if specified confidence level have been achieved.
|
||||||
|
"""
|
||||||
|
if not isinstance(graph_config, TraceConfig):
|
||||||
|
return all(self.HasEnoughRuns(child, confidence_level)
|
||||||
|
for child in graph_config.children)
|
||||||
|
|
||||||
|
trace = self.traces.get(graph_config.name, {})
|
||||||
|
results = trace.get('results', [])
|
||||||
|
logging.debug('HasEnoughRuns for %s', graph_config.name)
|
||||||
|
|
||||||
|
if len(results) < MIN_RUNS_FOR_CONFIDENCE:
|
||||||
|
logging.debug(' Ran %d times, need at least %d',
|
||||||
|
len(results), MIN_RUNS_FOR_CONFIDENCE)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logging.debug(' Results: %d entries', len(results))
|
||||||
|
mean = numpy.mean(results)
|
||||||
|
mean_stderr = numpy.std(results) / numpy.sqrt(len(results))
|
||||||
|
logging.debug(' Mean: %.2f, mean_stderr: %.2f', mean, mean_stderr)
|
||||||
|
return confidence_level * mean_stderr < mean / 1000.0
|
||||||
|
|
||||||
def __str__(self): # pragma: no cover
|
def __str__(self): # pragma: no cover
|
||||||
return json.dumps(self.ToDict(), indent=2, separators=(',', ': '))
|
return json.dumps(self.ToDict(), indent=2, separators=(',', ': '))
|
||||||
|
|
||||||
@ -383,8 +421,8 @@ class TraceConfig(GraphConfig):
|
|||||||
stddev = None
|
stddev = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = str(float(
|
result = float(
|
||||||
re.search(self.results_regexp, output.stdout, re.M).group(1)))
|
re.search(self.results_regexp, output.stdout, re.M).group(1))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
result_tracker.AddError(
|
result_tracker.AddError(
|
||||||
'Regexp "%s" returned a non-numeric for test %s.' %
|
'Regexp "%s" returned a non-numeric for test %s.' %
|
||||||
@ -740,6 +778,7 @@ class AndroidPlatform(Platform): # pragma: no cover
|
|||||||
output.duration = time.time() - start
|
output.duration = time.time() - start
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
class CustomMachineConfiguration:
|
class CustomMachineConfiguration:
|
||||||
def __init__(self, disable_aslr = False, governor = None):
|
def __init__(self, disable_aslr = False, governor = None):
|
||||||
self.aslr_backup = None
|
self.aslr_backup = None
|
||||||
@ -844,6 +883,12 @@ class CustomMachineConfiguration:
|
|||||||
raise Exception('Could not set CPU governor. Present value is %s'
|
raise Exception('Could not set CPU governor. Present value is %s'
|
||||||
% cur_value )
|
% cur_value )
|
||||||
|
|
||||||
|
|
||||||
|
class MaxTotalDurationReachedError(Exception):
|
||||||
|
"""Exception used to stop running tests when max total duration is reached."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def Main(argv):
|
def Main(argv):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--arch',
|
parser.add_argument('--arch',
|
||||||
@ -900,12 +945,28 @@ def Main(argv):
|
|||||||
'--filter=JSTests/TypedArrays/ will run only TypedArray '
|
'--filter=JSTests/TypedArrays/ will run only TypedArray '
|
||||||
'benchmarks from the JSTests suite.',
|
'benchmarks from the JSTests suite.',
|
||||||
default='')
|
default='')
|
||||||
|
parser.add_argument('--confidence-level', type=int,
|
||||||
|
help='Repeatedly runs each benchmark until specified '
|
||||||
|
'confidence level is reached. The value is interpreted '
|
||||||
|
'as the number of standard deviations from the mean that '
|
||||||
|
'all values must lie within. Typical values are 1, 2 and '
|
||||||
|
'3 and correspond to 68%, 95% and 99.7% probability that '
|
||||||
|
'the measured value is within 0.1% of the true value. '
|
||||||
|
'Larger values result in more retries and thus longer '
|
||||||
|
'runtime, but also provide more reliable results. Also '
|
||||||
|
'see --max-total-duration flag.')
|
||||||
|
parser.add_argument('--max-total-duration', type=int, default=7140, # 1h 59m
|
||||||
|
help='Max total duration in seconds allowed for retries '
|
||||||
|
'across all tests. This is especially useful in '
|
||||||
|
'combination with the --confidence-level flag.')
|
||||||
parser.add_argument('--dump-logcats-to',
|
parser.add_argument('--dump-logcats-to',
|
||||||
help='Writes logcat output from each test into specified '
|
help='Writes logcat output from each test into specified '
|
||||||
'directory. Only supported for android targets.')
|
'directory. Only supported for android targets.')
|
||||||
parser.add_argument("--run-count", type=int, default=0,
|
parser.add_argument('--run-count', type=int, default=0,
|
||||||
help="Override the run count specified by the test "
|
help='Override the run count specified by the test '
|
||||||
"suite. The default 0 uses the suite's config.")
|
'suite. The default 0 uses the suite\'s config.')
|
||||||
|
parser.add_argument('-v', '--verbose', default=False, action='store_true',
|
||||||
|
help='Be verbose and print debug output.')
|
||||||
parser.add_argument('suite', nargs='+', help='Path to the suite config file.')
|
parser.add_argument('suite', nargs='+', help='Path to the suite config file.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -914,7 +975,8 @@ def Main(argv):
|
|||||||
return INFRA_FAILURE_RETCODE
|
return INFRA_FAILURE_RETCODE
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s')
|
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||||
|
format='%(asctime)s %(levelname)-8s %(message)s')
|
||||||
|
|
||||||
if args.arch == 'auto': # pragma: no cover
|
if args.arch == 'auto': # pragma: no cover
|
||||||
args.arch = utils.DefaultArch()
|
args.arch = utils.DefaultArch()
|
||||||
@ -973,8 +1035,7 @@ def Main(argv):
|
|||||||
|
|
||||||
result_tracker = ResultTracker()
|
result_tracker = ResultTracker()
|
||||||
result_tracker_secondary = ResultTracker()
|
result_tracker_secondary = ResultTracker()
|
||||||
# We use list here to allow modification in nested function below.
|
have_failed_tests = False
|
||||||
have_failed_tests = [False]
|
|
||||||
with CustomMachineConfiguration(governor = args.cpu_governor,
|
with CustomMachineConfiguration(governor = args.cpu_governor,
|
||||||
disable_aslr = args.noaslr) as conf:
|
disable_aslr = args.noaslr) as conf:
|
||||||
for path in args.suite:
|
for path in args.suite:
|
||||||
@ -1000,6 +1061,8 @@ def Main(argv):
|
|||||||
platform.PreTests(node, path)
|
platform.PreTests(node, path)
|
||||||
|
|
||||||
# Traverse graph/trace tree and iterate over all runnables.
|
# Traverse graph/trace tree and iterate over all runnables.
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
for runnable in FlattenRunnables(root, NodeCB):
|
for runnable in FlattenRunnables(root, NodeCB):
|
||||||
runnable_name = '/'.join(runnable.graphs)
|
runnable_name = '/'.join(runnable.graphs)
|
||||||
if (not runnable_name.startswith(args.filter) and
|
if (not runnable_name.startswith(args.filter) and
|
||||||
@ -1007,9 +1070,27 @@ def Main(argv):
|
|||||||
continue
|
continue
|
||||||
logging.info('>>> Running suite: %s', runnable_name)
|
logging.info('>>> Running suite: %s', runnable_name)
|
||||||
|
|
||||||
|
def RunGenerator(runnable):
|
||||||
|
if args.confidence_level:
|
||||||
|
counter = 0
|
||||||
|
while not result_tracker.HasEnoughRuns(
|
||||||
|
runnable, args.confidence_level):
|
||||||
|
yield counter
|
||||||
|
counter += 1
|
||||||
|
else:
|
||||||
for i in range(0, max(1, args.run_count or runnable.run_count)):
|
for i in range(0, max(1, args.run_count or runnable.run_count)):
|
||||||
|
yield i
|
||||||
|
|
||||||
|
for i in RunGenerator(runnable):
|
||||||
attempts_left = runnable.retry_count + 1
|
attempts_left = runnable.retry_count + 1
|
||||||
while attempts_left:
|
while attempts_left:
|
||||||
|
total_duration = time.time() - start
|
||||||
|
if total_duration > args.max_total_duration:
|
||||||
|
logging.info(
|
||||||
|
'>>> Stopping now since running for too long (%ds > %ds)',
|
||||||
|
total_duration, args.max_total_duration)
|
||||||
|
raise MaxTotalDurationReachedError()
|
||||||
|
|
||||||
output, output_secondary = platform.Run(
|
output, output_secondary = platform.Run(
|
||||||
runnable, i, secondary=args.shell_dir_secondary)
|
runnable, i, secondary=args.shell_dir_secondary)
|
||||||
result_tracker.AddRunnableDuration(runnable, output.duration)
|
result_tracker.AddRunnableDuration(runnable, output.duration)
|
||||||
@ -1025,7 +1106,7 @@ def Main(argv):
|
|||||||
|
|
||||||
attempts_left -= 1
|
attempts_left -= 1
|
||||||
if not attempts_left: # ignore failures until last attempt
|
if not attempts_left: # ignore failures until last attempt
|
||||||
have_failed_tests[0] = True
|
have_failed_tests = True
|
||||||
else:
|
else:
|
||||||
logging.info('>>> Retrying suite: %s', runnable_name)
|
logging.info('>>> Retrying suite: %s', runnable_name)
|
||||||
|
|
||||||
@ -1033,6 +1114,8 @@ def Main(argv):
|
|||||||
result_tracker.timeouts.append(runnable_name)
|
result_tracker.timeouts.append(runnable_name)
|
||||||
if runnable.has_near_timeouts:
|
if runnable.has_near_timeouts:
|
||||||
result_tracker.near_timeouts.append(runnable_name)
|
result_tracker.near_timeouts.append(runnable_name)
|
||||||
|
except MaxTotalDurationReachedError:
|
||||||
|
have_failed_tests = True
|
||||||
|
|
||||||
platform.PostExecution()
|
platform.PostExecution()
|
||||||
|
|
||||||
@ -1048,7 +1131,7 @@ def Main(argv):
|
|||||||
print('Secondary results:', result_tracker_secondary)
|
print('Secondary results:', result_tracker_secondary)
|
||||||
|
|
||||||
if (result_tracker.errors or result_tracker_secondary.errors or
|
if (result_tracker.errors or result_tracker_secondary.errors or
|
||||||
have_failed_tests[0]):
|
have_failed_tests):
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
@ -7,9 +7,7 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import coverage
|
|
||||||
import json
|
import json
|
||||||
import mock
|
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import shutil
|
import shutil
|
||||||
@ -18,6 +16,9 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
import coverage
|
||||||
|
import mock
|
||||||
|
|
||||||
# Requires python-coverage and python-mock. Native python coverage
|
# Requires python-coverage and python-mock. Native python coverage
|
||||||
# version >= 3.7.1 should be installed to get the best speed.
|
# version >= 3.7.1 should be installed to get the best speed.
|
||||||
|
|
||||||
@ -208,8 +209,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
||||||
self.assertEqual(0, self._CallMain())
|
self.assertEqual(0, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyRunnableDurations(1, 60)
|
self._VerifyRunnableDurations(1, 60)
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
@ -223,8 +224,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
self._MockCommand(['.'], ['Richards: 1.234\nDeltaBlue: 10657567'])
|
self._MockCommand(['.'], ['Richards: 1.234\nDeltaBlue: 10657567'])
|
||||||
self.assertEqual(0, self._CallMain())
|
self.assertEqual(0, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join(
|
self._VerifyMock(os.path.join(
|
||||||
@ -241,8 +242,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
'Richards: 50\nDeltaBlue: 300\n'])
|
'Richards: 50\nDeltaBlue: 300\n'])
|
||||||
self.assertEqual(0, self._CallMain())
|
self.assertEqual(0, self._CallMain())
|
||||||
self._VerifyResults('v8', 'ms', [
|
self._VerifyResults('v8', 'ms', [
|
||||||
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''},
|
{'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['300.0', '200.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [300.0, 200.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join(
|
self._VerifyMock(os.path.join(
|
||||||
@ -260,8 +261,57 @@ class PerfTest(unittest.TestCase):
|
|||||||
'Richards: 50\nDeltaBlue: 300\n'])
|
'Richards: 50\nDeltaBlue: 300\n'])
|
||||||
self.assertEqual(0, self._CallMain())
|
self.assertEqual(0, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''},
|
{'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['300.0', '200.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [300.0, 200.0], 'stddev': ''},
|
||||||
|
])
|
||||||
|
self._VerifyErrors([])
|
||||||
|
self._VerifyMock(os.path.join(
|
||||||
|
'out', 'x64.release', 'd7'), '--flag', 'run.js')
|
||||||
|
|
||||||
|
def testPerfectConfidenceRuns(self):
|
||||||
|
self._WriteTestInput(V8_JSON)
|
||||||
|
self._MockCommand(
|
||||||
|
['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'] * 10)
|
||||||
|
self.assertEqual(0, self._CallMain('--confidence-level', '1'))
|
||||||
|
self._VerifyResults('test', 'score', [
|
||||||
|
{'name': 'Richards', 'results': [1.234] * 10, 'stddev': ''},
|
||||||
|
{'name': 'DeltaBlue', 'results': [10657567.0] * 10, 'stddev': ''},
|
||||||
|
])
|
||||||
|
self._VerifyErrors([])
|
||||||
|
self._VerifyMock(os.path.join(
|
||||||
|
'out', 'x64.release', 'd7'), '--flag', 'run.js')
|
||||||
|
|
||||||
|
def testNoisyConfidenceRuns(self):
|
||||||
|
self._WriteTestInput(V8_JSON)
|
||||||
|
self._MockCommand(
|
||||||
|
['.'],
|
||||||
|
reversed([
|
||||||
|
# First 10 runs are mandatory. DeltaBlue is slightly noisy.
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10757567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10557567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
# Need 4 more runs for confidence in DeltaBlue results.
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
'x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n',
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
self.assertEqual(0, self._CallMain('--confidence-level', '1'))
|
||||||
|
self._VerifyResults('test', 'score', [
|
||||||
|
{'name': 'Richards', 'results': [1.234] * 14, 'stddev': ''},
|
||||||
|
{
|
||||||
|
'name': 'DeltaBlue',
|
||||||
|
'results': [10757567.0, 10557567.0] + [10657567.0] * 12,
|
||||||
|
'stddev': '',
|
||||||
|
},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join(
|
self._VerifyMock(os.path.join(
|
||||||
@ -280,15 +330,15 @@ class PerfTest(unittest.TestCase):
|
|||||||
self.assertListEqual(sorted([
|
self.assertListEqual(sorted([
|
||||||
{'units': 'score',
|
{'units': 'score',
|
||||||
'graphs': ['test', 'Richards'],
|
'graphs': ['test', 'Richards'],
|
||||||
'results': ['50.0', '100.0'],
|
'results': [50.0, 100.0],
|
||||||
'stddev': ''},
|
'stddev': ''},
|
||||||
{'units': 'ms',
|
{'units': 'ms',
|
||||||
'graphs': ['test', 'Sub', 'Leaf'],
|
'graphs': ['test', 'Sub', 'Leaf'],
|
||||||
'results': ['3.0', '2.0', '1.0'],
|
'results': [3.0, 2.0, 1.0],
|
||||||
'stddev': ''},
|
'stddev': ''},
|
||||||
{'units': 'score',
|
{'units': 'score',
|
||||||
'graphs': ['test', 'DeltaBlue'],
|
'graphs': ['test', 'DeltaBlue'],
|
||||||
'results': ['200.0'],
|
'results': [200.0],
|
||||||
'stddev': ''},
|
'stddev': ''},
|
||||||
]), sorted(self._LoadResults()['traces']))
|
]), sorted(self._LoadResults()['traces']))
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
@ -309,8 +359,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
'DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n'])
|
'DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n'])
|
||||||
self.assertEqual(0, self._CallMain())
|
self.assertEqual(0, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': '0.23'},
|
{'name': 'Richards', 'results': [1.234], 'stddev': '0.23'},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': '106'},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': '106'},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(
|
self._VerifyMock(
|
||||||
@ -327,8 +377,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
'DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n'])
|
'DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n'])
|
||||||
self.assertEqual(1, self._CallMain())
|
self.assertEqual(1, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['2.0', '3.0'], 'stddev': '0.7'},
|
{'name': 'Richards', 'results': [2.0, 3.0], 'stddev': '0.7'},
|
||||||
{'name': 'DeltaBlue', 'results': ['5.0', '6.0'], 'stddev': '0.8'},
|
{'name': 'DeltaBlue', 'results': [5.0, 6.0], 'stddev': '0.8'},
|
||||||
])
|
])
|
||||||
self._VerifyErrors(
|
self._VerifyErrors(
|
||||||
['Test test/Richards should only run once since a stddev is provided '
|
['Test test/Richards should only run once since a stddev is provided '
|
||||||
@ -348,8 +398,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
mock.MagicMock(return_value={'is_android': False})).start()
|
mock.MagicMock(return_value={'is_android': False})).start()
|
||||||
self.assertEqual(0, self._CallMain('--buildbot'))
|
self.assertEqual(0, self._CallMain('--buildbot'))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
|
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
|
||||||
@ -364,9 +414,9 @@ class PerfTest(unittest.TestCase):
|
|||||||
mock.MagicMock(return_value={'is_android': False})).start()
|
mock.MagicMock(return_value={'is_android': False})).start()
|
||||||
self.assertEqual(0, self._CallMain('--buildbot'))
|
self.assertEqual(0, self._CallMain('--buildbot'))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
{'name': 'Total', 'results': ['3626.49109719'], 'stddev': ''},
|
{'name': 'Total', 'results': [3626.491097190233], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
|
self._VerifyMock(os.path.join('out', 'Release', 'd7'), '--flag', 'run.js')
|
||||||
@ -381,7 +431,7 @@ class PerfTest(unittest.TestCase):
|
|||||||
mock.MagicMock(return_value={'is_android': False})).start()
|
mock.MagicMock(return_value={'is_android': False})).start()
|
||||||
self.assertEqual(1, self._CallMain('--buildbot'))
|
self.assertEqual(1, self._CallMain('--buildbot'))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors(
|
self._VerifyErrors(
|
||||||
['Regexp "^Richards: (.+)$" '
|
['Regexp "^Richards: (.+)$" '
|
||||||
@ -395,7 +445,7 @@ class PerfTest(unittest.TestCase):
|
|||||||
self._MockCommand(['.'], ['x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
self._MockCommand(['.'], ['x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
||||||
self.assertEqual(1, self._CallMain())
|
self.assertEqual(1, self._CallMain())
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors(
|
self._VerifyErrors(
|
||||||
['Regexp "^Richards: (.+)$" did not match for test test/Richards.'])
|
['Regexp "^Richards: (.+)$" did not match for test test/Richards.'])
|
||||||
@ -442,8 +492,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
return_value={'is_android': True}).start()
|
return_value={'is_android': True}).start()
|
||||||
self.assertEqual(0, self._CallMain('--arch', 'arm'))
|
self.assertEqual(0, self._CallMain('--arch', 'arm'))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
|
|
||||||
def testTwoRuns_Trybot(self):
|
def testTwoRuns_Trybot(self):
|
||||||
@ -462,12 +512,12 @@ class PerfTest(unittest.TestCase):
|
|||||||
'--json-test-results-secondary', test_output_secondary,
|
'--json-test-results-secondary', test_output_secondary,
|
||||||
))
|
))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['100.0', '200.0'], 'stddev': ''},
|
{'name': 'Richards', 'results': [100.0, 200.0], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['20.0', '20.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [20.0, 20.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['50.0', '100.0'], 'stddev': ''},
|
{'name': 'Richards', 'results': [50.0, 100.0], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['200.0', '200.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [200.0, 200.0], 'stddev': ''},
|
||||||
], test_output_secondary)
|
], test_output_secondary)
|
||||||
self._VerifyRunnableDurations(2, 60, test_output_secondary)
|
self._VerifyRunnableDurations(2, 60, test_output_secondary)
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
@ -486,8 +536,8 @@ class PerfTest(unittest.TestCase):
|
|||||||
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
self._MockCommand(['.'], ['x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n'])
|
||||||
self.assertEqual(0, self._CallMain('--extra-flags=--prof'))
|
self.assertEqual(0, self._CallMain('--extra-flags=--prof'))
|
||||||
self._VerifyResults('test', 'score', [
|
self._VerifyResults('test', 'score', [
|
||||||
{'name': 'Richards', 'results': ['1.234'], 'stddev': ''},
|
{'name': 'Richards', 'results': [1.234], 'stddev': ''},
|
||||||
{'name': 'DeltaBlue', 'results': ['10657567.0'], 'stddev': ''},
|
{'name': 'DeltaBlue', 'results': [10657567.0], 'stddev': ''},
|
||||||
])
|
])
|
||||||
self._VerifyErrors([])
|
self._VerifyErrors([])
|
||||||
self._VerifyMock(os.path.join('out', 'x64.release', 'd7'),
|
self._VerifyMock(os.path.join('out', 'x64.release', 'd7'),
|
||||||
@ -514,13 +564,13 @@ class PerfTest(unittest.TestCase):
|
|||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test1', 'Richards'],
|
'graphs': ['test1', 'Richards'],
|
||||||
'results': [u'1.2', u'1.2'],
|
'results': [1.2, 1.2],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test1', 'DeltaBlue'],
|
'graphs': ['test1', 'DeltaBlue'],
|
||||||
'results': [u'2.1', u'2.1'],
|
'results': [2.1, 2.1],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
]), sorted(results['traces']))
|
]), sorted(results['traces']))
|
||||||
@ -532,13 +582,13 @@ class PerfTest(unittest.TestCase):
|
|||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test2', 'Richards'],
|
'graphs': ['test2', 'Richards'],
|
||||||
'results': [u'1.2', u'1.2'],
|
'results': [1.2, 1.2],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test2', 'DeltaBlue'],
|
'graphs': ['test2', 'DeltaBlue'],
|
||||||
'results': [u'2.1', u'2.1'],
|
'results': [2.1, 2.1],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
], results['traces'])
|
], results['traces'])
|
||||||
@ -550,13 +600,13 @@ class PerfTest(unittest.TestCase):
|
|||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test3', 'Octane', 'Richards'],
|
'graphs': ['test3', 'Octane', 'Richards'],
|
||||||
'results': [u'1.2'],
|
'results': [1.2],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'units': 'score',
|
'units': 'score',
|
||||||
'graphs': ['test3', 'Octane', 'DeltaBlue'],
|
'graphs': ['test3', 'Octane', 'DeltaBlue'],
|
||||||
'results': [u'2.1'],
|
'results': [2.1],
|
||||||
'stddev': '',
|
'stddev': '',
|
||||||
},
|
},
|
||||||
], results['traces'])
|
], results['traces'])
|
||||||
|
Loading…
Reference in New Issue
Block a user