98b47ebb33
It's nice to have a quick way to get stable results on the local machine for the classic d8 benchmarks. CSuite revitalizes an old tool called BSuite which offers multiple averaged runs, and a nice ascii display of results. Sample usage: ./csuite.py octane baseline ~/src/v8/out/d8 ./csuite.py octane compare ~/src/v8/out-mine/d8 benchmark: score | master | % | ===================================================+==========+========+ Richards: 26201.0 | 26568.0 | -1.4 | DeltaBlue: 55744.0 | 58309.0 | -4.4 | Crypto: 31831.0 | 31815.0 | | RayTrace: 73481.0 | 73185.0 | 0.4 | ............<snip>............................ Octane: 36388.0 | 35741.0 | 1.8 | ---------------------------------------------------+----------+--------+ See README.md for more documentation. Change-Id: I182490506ca07fab1240bd485bd1d7a8920db893 Reviewed-on: https://chromium-review.googlesource.com/c/1387487 Commit-Queue: Michael Stanton <mvstanton@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#58434}
265 lines
8.6 KiB
Python
Executable File
265 lines
8.6 KiB
Python
Executable File
#!/usr/bin/python
|
|
# Copyright 2018 the V8 project authors. All rights reserved.
|
|
|
|
'''
|
|
python %prog [options] [baseline_files]
|
|
|
|
Compare benchmark results from the benchmark runner against one or
|
|
more baselines. You can either pipe the result of the benchmark
|
|
runner directly into this script or specify the results file with
|
|
the -f option.
|
|
'''
|
|
|
|
import csv
|
|
import math
|
|
from optparse import OptionParser
|
|
import os
|
|
import sys
|
|
|
|
PERCENT_CONSIDERED_SIGNIFICANT = 0.5
|
|
PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02
|
|
PROBABILITY_CONSIDERED_MEANINGLESS = 0.05
|
|
|
|
RESET_SEQ = "\033[0m"
|
|
RED_SEQ = "\033[31m"
|
|
GREEN_SEQ = "\033[32m"
|
|
BLUE_SEQ = "\033[34m"
|
|
BOLD_SEQ = "\033[1m"
|
|
|
|
v8_benchmarks = ["V8", "Octane", "Richards", "DeltaBlue", "Crypto",
|
|
"EarleyBoyer", "RayTrace", "RegExp", "Splay", "SplayLatency",
|
|
"NavierStokes", "PdfJS", "Mandreel", "MandreelLatency",
|
|
"Gameboy", "CodeLoad", "Box2D", "zlib", "Typescript"]
|
|
|
|
suite_names = ["V8", "Octane", "Kraken-Orig", "Kraken-Once", "Kraken",
|
|
"SunSpider", "SunSpider-Once", "SunSpider-Orig"]
|
|
|
|
def ColorText(opts, text):
|
|
if opts.no_color:
|
|
result = text.replace("$RESET", "")
|
|
result = result.replace("$BLUE", "")
|
|
result = result.replace("$RED", "")
|
|
result = result.replace("$GREEN", "")
|
|
result = result.replace("$BOLD", "")
|
|
else:
|
|
if opts.html:
|
|
result = text.replace("$RESET", "</font></b>")
|
|
result = result.replace("$BLUE", "<font COLOR=\"0000DD\">")
|
|
result = result.replace("$RED", "<font COLOR=\"DD0000\">")
|
|
result = result.replace("$GREEN", "<font COLOR=\"00DD00\">")
|
|
result = result.replace("$BOLD", "<b>")
|
|
else:
|
|
result = text.replace("$RESET", RESET_SEQ)
|
|
result = result.replace("$BLUE", BLUE_SEQ)
|
|
result = result.replace("$RED", RED_SEQ)
|
|
result = result.replace("$GREEN", GREEN_SEQ)
|
|
result = result.replace("$BOLD", BOLD_SEQ)
|
|
return result
|
|
|
|
def NormalizedSigmaToString(normalized_sigma):
|
|
assert normalized_sigma >= 0
|
|
if normalized_sigma < PROBABILITY_CONSIDERED_SIGNIFICANT:
|
|
return "|"
|
|
return "S"
|
|
|
|
def ComputeZ(baseline_avg, baseline_sigma, mean, n):
|
|
if baseline_sigma == 0:
|
|
return 1000.0;
|
|
return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n)))
|
|
|
|
# Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html
|
|
def ComputeProbability(z):
|
|
if z > 2.575829: # p 0.005: two sided < 0.01
|
|
return 0
|
|
if z > 2.326348: # p 0.010
|
|
return 0.01
|
|
if z > 2.170091: # p 0.015
|
|
return 0.02
|
|
if z > 2.053749: # p 0.020
|
|
return 0.03
|
|
if z > 1.959964: # p 0.025: two sided < 0.05
|
|
return 0.04
|
|
if z > 1.880793: # p 0.030
|
|
return 0.05
|
|
if z > 1.811910: # p 0.035
|
|
return 0.06
|
|
if z > 1.750686: # p 0.040
|
|
return 0.07
|
|
if z > 1.695397: # p 0.045
|
|
return 0.08
|
|
if z > 1.644853: # p 0.050: two sided < 0.10
|
|
return 0.09
|
|
if z > 1.281551: # p 0.100: two sided < 0.20
|
|
return 0.10
|
|
return 0.20 # two sided p >= 0.20
|
|
|
|
def PercentColor(change_percent, flakyness):
|
|
result = ""
|
|
if change_percent >= PERCENT_CONSIDERED_SIGNIFICANT:
|
|
result = "$GREEN"
|
|
elif change_percent <= -PERCENT_CONSIDERED_SIGNIFICANT:
|
|
result = "$RED"
|
|
else:
|
|
return ""
|
|
if flakyness < PROBABILITY_CONSIDERED_SIGNIFICANT:
|
|
result += "$BOLD"
|
|
elif flakyness > PROBABILITY_CONSIDERED_MEANINGLESS:
|
|
result = ""
|
|
return result
|
|
|
|
def ProcessOneResultLine(opts, suite, testname, time, sigma, num, baselines):
|
|
time = float(time)
|
|
sigma = float(sigma)
|
|
num = int(num)
|
|
if testname in suite_names:
|
|
base_color = "$BOLD"
|
|
else:
|
|
base_color = ""
|
|
if opts.html:
|
|
line_out = ("<tr><td>%s%s$RESET</td><td>%s%8.1f$RESET</td>" %
|
|
(base_color, testname, base_color, time))
|
|
else:
|
|
sigma_string = NormalizedSigmaToString(sigma / time)
|
|
line_out = ("%s%40s$RESET: %s%8.1f$RESET %s" %
|
|
(base_color, testname, base_color, time, sigma_string))
|
|
for baseline in baselines:
|
|
raw_score = ""
|
|
compare_score = ""
|
|
found = False
|
|
if suite in baseline[1]:
|
|
baseline_results = baseline[1][suite]
|
|
for item in baseline_results:
|
|
if testname == item[0]:
|
|
found = True
|
|
raw_score_num = float(item[1])
|
|
raw_sigma_num = float(item[2])
|
|
raw_score = "%7.1f" % raw_score_num
|
|
compare_num = 0
|
|
compare_score = ""
|
|
percent_color = ""
|
|
if testname in v8_benchmarks:
|
|
compare_num = 100*time/raw_score_num - 100
|
|
else:
|
|
compare_num = 100*raw_score_num/time - 100
|
|
if abs(compare_num) > 0.1:
|
|
compare_score = "%3.1f" % (compare_num)
|
|
z = ComputeZ(raw_score_num, raw_sigma_num, time, num)
|
|
p = ComputeProbability(z)
|
|
percent_color = PercentColor(compare_num, p)
|
|
sigma_string = NormalizedSigmaToString(raw_sigma_num / raw_score_num)
|
|
if opts.html:
|
|
format_string = "<td>%s%8s$RESET</td><td>%s%6s$RESET</td>"
|
|
else:
|
|
format_string = " %s%8s$RESET %s %s%6s$RESET |"
|
|
line_out += (format_string %
|
|
(base_color, raw_score, sigma_string,
|
|
percent_color, compare_score))
|
|
if not found:
|
|
if opts.html:
|
|
line_out += "<td></td><td></td>"
|
|
else:
|
|
line_out += "| | "
|
|
if opts.html:
|
|
line_out += "</tr>"
|
|
print(ColorText(opts, line_out))
|
|
|
|
def PrintSeparator(opts, baselines, big):
|
|
if not opts.html:
|
|
if big:
|
|
separator = "==================================================="
|
|
else:
|
|
separator = "---------------------------------------------------"
|
|
for baseline in baselines:
|
|
if big:
|
|
separator += "+==========+========"
|
|
else:
|
|
separator += "+----------+--------"
|
|
separator += "+"
|
|
print(separator)
|
|
|
|
def ProcessResults(opts, results, baselines):
|
|
for suite in suite_names:
|
|
if suite in results:
|
|
for result in results[suite]:
|
|
ProcessOneResultLine(opts, suite, result[0], result[1], result[2],
|
|
result[3], baselines);
|
|
PrintSeparator(opts, baselines, False)
|
|
|
|
def ProcessFile(file_path):
|
|
file_reader = csv.reader(open(file_path, 'rb'), delimiter=',')
|
|
benchmark_results = {}
|
|
current_rows = []
|
|
for row in file_reader:
|
|
if len(row) > 1:
|
|
current_rows.append(row)
|
|
for suite in suite_names:
|
|
if row[0] == suite:
|
|
benchmark_results[row[0]] = current_rows
|
|
current_rows = []
|
|
return benchmark_results
|
|
|
|
def ProcessStdIn():
|
|
benchmark_results = {}
|
|
current_rows = []
|
|
for line_in in sys.stdin:
|
|
line_in = line_in.rstrip()
|
|
row = line_in.split(",")
|
|
if len(row) > 1:
|
|
current_rows.append(row)
|
|
for suite in suite_names:
|
|
if row[0] == suite:
|
|
benchmark_results[row[0]] = current_rows
|
|
current_rows = []
|
|
return benchmark_results
|
|
|
|
def CompareFiles(opts, args):
|
|
results = []
|
|
baselines = []
|
|
for file_path in args:
|
|
baseline = ProcessFile(file_path)
|
|
baselines.append((os.path.basename(file_path), baseline))
|
|
if opts.html:
|
|
header = "<tr><th>benchmark</th><th>score</th>"
|
|
else:
|
|
header = "%40s: %8s " % ("benchmark", "score")
|
|
for baseline in baselines:
|
|
(baseline_name, baseline_results) = baseline
|
|
if opts.html:
|
|
header += ("<th>%s</th><th>%s</th>") % (baseline_name[0:7], "%")
|
|
else:
|
|
header += "| %8s | %6s " % (baseline_name[0:7], "%")
|
|
if opts.html:
|
|
header += "</tr>\n"
|
|
else:
|
|
header += "|"
|
|
print(header)
|
|
PrintSeparator(opts, baselines, True)
|
|
if opts.filename:
|
|
file_reader = csv.reader(open(opts.filename, 'rb'), delimiter=',')
|
|
results = ProcessFile(opts.filename)
|
|
else:
|
|
results = ProcessStdIn()
|
|
ProcessResults(opts, results, baselines)
|
|
|
|
if __name__ == '__main__':
|
|
parser = OptionParser(usage=__doc__)
|
|
parser.add_option("-f", "--filename", dest="filename",
|
|
help="Specifies the filename for the results to "\
|
|
"compare to the baselines rather than reading from stdin.")
|
|
parser.add_option("-b", "--baselines", dest="baselines",
|
|
help="Specifies a directory of baseline files to "\
|
|
"compare against.")
|
|
parser.add_option("-n", "--no-color", action="store_true",
|
|
dest="no_color", default=False,
|
|
help="Generates output without escape codes that "\
|
|
"add color highlights.")
|
|
parser.add_option("--html", action="store_true",
|
|
dest="html", default=False,
|
|
help="Generates output as a HTML table ")
|
|
(opts, args) = parser.parse_args()
|
|
if opts.baselines:
|
|
args.extend(map(lambda x: (opts.baselines + "/" + x),
|
|
(os.listdir(opts.baselines))))
|
|
args = reversed(sorted(args))
|
|
CompareFiles(opts, args)
|