v8/tools/perf-compare.py
Igor Sheludko e7f5a04057 [tools] Standalone script for generating comparison of tools/run_perf.py results.
Inspired by bsuite/compare-baseline.py and tools/perf-to-html.py. The main difference
from the latter is that the new script does not require json files preprocessing and it
is able to compare multiple json files at once (just like bsuite/compare-baseline.py).

This CL also makes tools/run_perf.py relative path friendly.

BUG=

Change-Id: I7be796fa920f8360703806f32767b992eec0f9c2
Reviewed-on: https://chromium-review.googlesource.com/456286
Reviewed-by: Michael Stanton <mvstanton@chromium.org>
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Commit-Queue: Igor Sheludko <ishell@chromium.org>
Cr-Commit-Position: refs/heads/master@{#43982}
2017-03-21 14:13:12 +00:00

479 lines
12 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright 2017 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''
python %prog
Compare perf trybot JSON files and output the results into a pleasing HTML page.
Examples:
%prog -t "ia32 results" Result,../result.json Master,/path-to/master.json -o results.html
%prog -t "x64 results" ../result.json master.json -o results.html
'''
from collections import OrderedDict
import commands
import json
import math
from argparse import ArgumentParser
import os
import shutil
import sys
import tempfile
PERCENT_CONSIDERED_SIGNIFICANT = 0.5
PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02
PROBABILITY_CONSIDERED_MEANINGLESS = 0.05
class Statistics:
@staticmethod
def Mean(values):
return float(sum(values)) / len(values)
@staticmethod
def Variance(values, average):
return map(lambda x: (x - average) ** 2, values)
@staticmethod
def StandardDeviation(values, average):
return math.sqrt(Statistics.Mean(Statistics.Variance(values, average)))
@staticmethod
def ComputeZ(baseline_avg, baseline_sigma, mean, n):
if baseline_sigma == 0:
return 1000.0;
return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n)))
# Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html
@staticmethod
def ComputeProbability(z):
if z > 2.575829: # p 0.005: two sided < 0.01
return 0
if z > 2.326348: # p 0.010
return 0.01
if z > 2.170091: # p 0.015
return 0.02
if z > 2.053749: # p 0.020
return 0.03
if z > 1.959964: # p 0.025: two sided < 0.05
return 0.04
if z > 1.880793: # p 0.030
return 0.05
if z > 1.811910: # p 0.035
return 0.06
if z > 1.750686: # p 0.040
return 0.07
if z > 1.695397: # p 0.045
return 0.08
if z > 1.644853: # p 0.050: two sided < 0.10
return 0.09
if z > 1.281551: # p 0.100: two sided < 0.20
return 0.10
return 0.20 # two sided p >= 0.20
class ResultsDiff:
def __init__(self, significant, notable, percentage_string):
self.significant_ = significant
self.notable_ = notable
self.percentage_string_ = percentage_string
def percentage_string(self):
return self.percentage_string_;
def isSignificant(self):
return self.significant_
def isNotablyPositive(self):
return self.notable_ > 0
def isNotablyNegative(self):
return self.notable_ < 0
class BenchmarkResult:
def __init__(self, units, count, result, sigma):
self.units_ = units
self.count_ = float(count)
self.result_ = float(result)
self.sigma_ = float(sigma)
def Compare(self, other):
if self.units_ != other.units_:
print ("Incompatible units: %s and %s" % (self.units_, other.units_))
sys.exit(1)
significant = False
notable = 0
percentage_string = ""
# compute notability and significance.
if self.units_ == "score":
compare_num = 100*self.result_/other.result_ - 100
else:
compare_num = 100*other.result_/self.result_ - 100
if abs(compare_num) > 0.1:
percentage_string = "%3.1f" % (compare_num)
z = Statistics.ComputeZ(other.result_, other.sigma_,
self.result_, self.count_)
p = Statistics.ComputeProbability(z)
if p < PROBABILITY_CONSIDERED_SIGNIFICANT:
significant = True
if compare_num >= PERCENT_CONSIDERED_SIGNIFICANT:
notable = 1
elif compare_num <= -PERCENT_CONSIDERED_SIGNIFICANT:
notable = -1
return ResultsDiff(significant, notable, percentage_string)
def result(self):
return self.result_
def sigma(self):
return self.sigma_
class Benchmark:
def __init__(self, name):
self.name_ = name
self.runs_ = {}
def name(self):
return self.name_
def getResult(self, run_name):
return self.runs_.get(run_name)
def appendResult(self, run_name, trace):
values = map(float, trace['results'])
count = len(values)
mean = Statistics.Mean(values)
stddev = float(trace.get('stddev') or
Statistics.StandardDeviation(values, mean))
units = trace["units"]
# print run_name, units, count, mean, stddev
self.runs_[run_name] = BenchmarkResult(units, count, mean, stddev)
class BenchmarkSuite:
def __init__(self, name):
self.name_ = name
self.benchmarks_ = {}
def SortedTestKeys(self):
keys = self.benchmarks_.keys()
keys.sort()
t = "Total"
if t in keys:
keys.remove(t)
keys.append(t)
return keys
def name(self):
return self.name_
def getBenchmark(self, benchmark_name):
benchmark_object = self.benchmarks_.get(benchmark_name)
if benchmark_object == None:
benchmark_object = Benchmark(benchmark_name)
self.benchmarks_[benchmark_name] = benchmark_object
return benchmark_object
class ResultTableRenderer:
def __init__(self, output_file):
self.benchmarks_ = []
self.print_output_ = []
self.output_file_ = output_file
def Print(self, str_data):
self.print_output_.append(str_data)
def FlushOutput(self):
string_data = "\n".join(self.print_output_)
print_output = []
if self.output_file_:
# create a file
with open(self.output_file_, "w") as text_file:
text_file.write(string_data)
else:
print(string_data)
def bold(self, data):
return "<b>" + data + "</b>"
def red(self, data):
return "<font color=\"red\">" + data + "</font>"
def green(self, data):
return "<font color=\"green\">" + data + "</font>"
def PrintHeader(self):
data = """<html>
<head>
<title>Output</title>
<style type="text/css">
/*
Style inspired by Andy Ferra's gist at https://gist.github.com/andyferra/2554919
*/
body {
font-family: Helvetica, arial, sans-serif;
font-size: 14px;
line-height: 1.6;
padding-top: 10px;
padding-bottom: 10px;
background-color: white;
padding: 30px;
}
h1, h2, h3, h4, h5, h6 {
margin: 20px 0 10px;
padding: 0;
font-weight: bold;
-webkit-font-smoothing: antialiased;
cursor: text;
position: relative;
}
h1 {
font-size: 28px;
color: black;
}
h2 {
font-size: 24px;
border-bottom: 1px solid #cccccc;
color: black;
}
h3 {
font-size: 18px;
}
h4 {
font-size: 16px;
}
h5 {
font-size: 14px;
}
h6 {
color: #777777;
font-size: 14px;
}
p, blockquote, ul, ol, dl, li, table, pre {
margin: 15px 0;
}
li p.first {
display: inline-block;
}
ul, ol {
padding-left: 30px;
}
ul :first-child, ol :first-child {
margin-top: 0;
}
ul :last-child, ol :last-child {
margin-bottom: 0;
}
table {
padding: 0;
}
table tr {
border-top: 1px solid #cccccc;
background-color: white;
margin: 0;
padding: 0;
}
table tr:nth-child(2n) {
background-color: #f8f8f8;
}
table tr th {
font-weight: bold;
border: 1px solid #cccccc;
text-align: left;
margin: 0;
padding: 6px 13px;
}
table tr td {
border: 1px solid #cccccc;
text-align: left;
margin: 0;
padding: 6px 13px;
}
table tr th :first-child, table tr td :first-child {
margin-top: 0;
}
table tr th :last-child, table tr td :last-child {
margin-bottom: 0;
}
</style>
</head>
<body>
"""
self.Print(data)
def StartSuite(self, suite_name, run_names):
self.Print("<h2>")
self.Print("<a name=\"" + suite_name + "\">" +
suite_name + "</a> <a href=\"#top\">(top)</a>")
self.Print("</h2>");
self.Print("<table class=\"benchmark\">")
self.Print("<thead>")
self.Print(" <th>Test</th>")
main_run = None
for run_name in run_names:
self.Print(" <th>" + run_name + "</th>")
if main_run == None:
main_run = run_name
else:
self.Print(" <th>%</th>")
self.Print("</thead>")
self.Print("<tbody>")
def FinishSuite(self):
self.Print("</tbody>")
self.Print("</table>")
def StartBenchmark(self, benchmark_name):
self.Print(" <tr>")
self.Print(" <td>" + benchmark_name + "</td>")
def FinishBenchmark(self):
self.Print(" </tr>")
def PrintResult(self, run):
if run == None:
self.PrintEmptyCell()
return
self.Print(" <td>" + str(run.result()) + "</td>")
def PrintComparison(self, run, main_run):
if run == None or main_run == None:
self.PrintEmptyCell()
return
diff = run.Compare(main_run)
res = diff.percentage_string()
if diff.isSignificant():
res = self.bold(res)
if diff.isNotablyPositive():
res = self.green(res)
elif diff.isNotablyNegative():
res = self.red(res)
self.Print(" <td>" + res + "</td>")
def PrintEmptyCell(self):
self.Print(" <td></td>")
def StartTOC(self, title):
self.Print("<h1>" + title + "</h1>")
self.Print("<ul>")
def FinishTOC(self):
self.Print("</ul>")
def PrintBenchmarkLink(self, benchmark):
self.Print("<li><a href=\"#" + benchmark + "\">" + benchmark + "</a></li>")
def PrintFooter(self):
data = """</body>
</html>
"""
self.Print(data)
def Render(args):
benchmark_suites = {}
run_names = OrderedDict()
for json_file in args.json_file:
if len(json_file) == 1:
filename = json_file[0]
run_name = os.path.splitext(filename)[0]
else:
run_name = json_file[0]
filename = json_file[1]
with open(filename) as json_data:
data = json.load(json_data)
run_names[run_name] = 0
for error in data["errors"]:
print "Error:", error
for trace in data["traces"]:
suite_name = trace["graphs"][0]
benchmark_name = "/".join(trace["graphs"][1:])
benchmark_suite_object = benchmark_suites.get(suite_name)
if benchmark_suite_object == None:
benchmark_suite_object = BenchmarkSuite(suite_name)
benchmark_suites[suite_name] = benchmark_suite_object
benchmark_object = benchmark_suite_object.getBenchmark(benchmark_name)
benchmark_object.appendResult(run_name, trace);
renderer = ResultTableRenderer(args.output)
renderer.PrintHeader()
title = args.title or "Benchmark results"
renderer.StartTOC(title)
for suite_name, benchmark_suite_object in sorted(benchmark_suites.iteritems()):
renderer.PrintBenchmarkLink(suite_name)
renderer.FinishTOC()
for suite_name, benchmark_suite_object in sorted(benchmark_suites.iteritems()):
renderer.StartSuite(suite_name, run_names)
for benchmark_name in benchmark_suite_object.SortedTestKeys():
benchmark_object = benchmark_suite_object.getBenchmark(benchmark_name)
# print suite_name, benchmark_object.name()
renderer.StartBenchmark(benchmark_name)
main_run = None
main_result = None
for run_name in run_names:
result = benchmark_object.getResult(run_name)
renderer.PrintResult(result)
if main_run == None:
main_run = run_name
main_result = result
else:
renderer.PrintComparison(result, main_result)
renderer.FinishBenchmark()
renderer.FinishSuite()
renderer.PrintFooter()
renderer.FlushOutput()
def pair(arg):
return [x for x in arg.split(',')]
if __name__ == '__main__':
parser = ArgumentParser(description="Compare perf trybot JSON files and " +
"output the results into a pleasing HTML page.")
parser.add_argument("-t", "--title", dest="title",
help="Optional title of the web page")
parser.add_argument("-o", "--output", dest="output",
help="Write html output to this file rather than stdout")
parser.add_argument("json_file", nargs="+", type=pair,
help="[column name,]./path-to/result.json - a comma-separated" +
" pair of optional column name and path to json file")
args = parser.parse_args()
Render(args)