v8/test/benchmarks/csuite/compare-baseline.py
Vadim Gorbachev (bmsdave) 631213f6f6 Reland "Preparing v8 to use with python3 /test"
This is a reland of f8962ae1a2

Original change's description:
> Preparing v8 to use with python3 /test
>
> There are now less that 400 days until the end of life
> of Python 2(aka _legacy_ Python) https://pythonclock.org/ .
> The code compatibility check for python2 and python3
> used the following tools: futurize, flake8
> You can see the reports here: https://travis-ci.com/bmsdave/v8/builds
>
> This CL was uploaded by git cl split.
>
> Bug: v8:8594
> Change-Id: Idbf467daf629a4e808345a6a88036c2a3f259138
> Reviewed-on: https://chromium-review.googlesource.com/c/1470121
> Commit-Queue: Michael Achenbach <machenbach@chromium.org>
> Reviewed-by: Michael Achenbach <machenbach@chromium.org>
> Reviewed-by: Sergiy Belozorov <sergiyb@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#59679}

Bug: v8:8594
Change-Id: I8c1a8d6593a4a927d56d37dada2c704062e842cc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1484300
Reviewed-by: Tamer Tas <tmrts@chromium.org>
Reviewed-by: Sergiy Belozorov <sergiyb@chromium.org>
Commit-Queue: Tamer Tas <tmrts@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60354}
2019-03-20 09:56:06 +00:00

268 lines
8.6 KiB
Python
Executable File

#!/usr/bin/python
# Copyright 2018 the V8 project authors. All rights reserved.
'''
python %prog [options] [baseline_files]
Compare benchmark results from the benchmark runner against one or
more baselines. You can either pipe the result of the benchmark
runner directly into this script or specify the results file with
the -f option.
'''
# for py2/py3 compatibility
from __future__ import print_function
import csv
import math
from optparse import OptionParser
import os
import sys
PERCENT_CONSIDERED_SIGNIFICANT = 0.5
PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02
PROBABILITY_CONSIDERED_MEANINGLESS = 0.05
RESET_SEQ = "\033[0m"
RED_SEQ = "\033[31m"
GREEN_SEQ = "\033[32m"
BLUE_SEQ = "\033[34m"
BOLD_SEQ = "\033[1m"
v8_benchmarks = ["V8", "Octane", "Richards", "DeltaBlue", "Crypto",
"EarleyBoyer", "RayTrace", "RegExp", "Splay", "SplayLatency",
"NavierStokes", "PdfJS", "Mandreel", "MandreelLatency",
"Gameboy", "CodeLoad", "Box2D", "zlib", "Typescript"]
suite_names = ["V8", "Octane", "Kraken-Orig", "Kraken-Once", "Kraken",
"SunSpider", "SunSpider-Once", "SunSpider-Orig"]
def ColorText(opts, text):
if opts.no_color:
result = text.replace("$RESET", "")
result = result.replace("$BLUE", "")
result = result.replace("$RED", "")
result = result.replace("$GREEN", "")
result = result.replace("$BOLD", "")
else:
if opts.html:
result = text.replace("$RESET", "</font></b>")
result = result.replace("$BLUE", "<font COLOR=\"0000DD\">")
result = result.replace("$RED", "<font COLOR=\"DD0000\">")
result = result.replace("$GREEN", "<font COLOR=\"00DD00\">")
result = result.replace("$BOLD", "<b>")
else:
result = text.replace("$RESET", RESET_SEQ)
result = result.replace("$BLUE", BLUE_SEQ)
result = result.replace("$RED", RED_SEQ)
result = result.replace("$GREEN", GREEN_SEQ)
result = result.replace("$BOLD", BOLD_SEQ)
return result
def NormalizedSigmaToString(normalized_sigma):
assert normalized_sigma >= 0
if normalized_sigma < PROBABILITY_CONSIDERED_SIGNIFICANT:
return "|"
return "S"
def ComputeZ(baseline_avg, baseline_sigma, mean, n):
if baseline_sigma == 0:
return 1000.0;
return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n)))
# Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html
def ComputeProbability(z):
if z > 2.575829: # p 0.005: two sided < 0.01
return 0
if z > 2.326348: # p 0.010
return 0.01
if z > 2.170091: # p 0.015
return 0.02
if z > 2.053749: # p 0.020
return 0.03
if z > 1.959964: # p 0.025: two sided < 0.05
return 0.04
if z > 1.880793: # p 0.030
return 0.05
if z > 1.811910: # p 0.035
return 0.06
if z > 1.750686: # p 0.040
return 0.07
if z > 1.695397: # p 0.045
return 0.08
if z > 1.644853: # p 0.050: two sided < 0.10
return 0.09
if z > 1.281551: # p 0.100: two sided < 0.20
return 0.10
return 0.20 # two sided p >= 0.20
def PercentColor(change_percent, flakyness):
result = ""
if change_percent >= PERCENT_CONSIDERED_SIGNIFICANT:
result = "$GREEN"
elif change_percent <= -PERCENT_CONSIDERED_SIGNIFICANT:
result = "$RED"
else:
return ""
if flakyness < PROBABILITY_CONSIDERED_SIGNIFICANT:
result += "$BOLD"
elif flakyness > PROBABILITY_CONSIDERED_MEANINGLESS:
result = ""
return result
def ProcessOneResultLine(opts, suite, testname, time, sigma, num, baselines):
time = float(time)
sigma = float(sigma)
num = int(num)
if testname in suite_names:
base_color = "$BOLD"
else:
base_color = ""
if opts.html:
line_out = ("<tr><td>%s%s$RESET</td><td>%s%8.1f$RESET</td>" %
(base_color, testname, base_color, time))
else:
sigma_string = NormalizedSigmaToString(sigma / time)
line_out = ("%s%40s$RESET: %s%8.1f$RESET %s" %
(base_color, testname, base_color, time, sigma_string))
for baseline in baselines:
raw_score = ""
compare_score = ""
found = False
if suite in baseline[1]:
baseline_results = baseline[1][suite]
for item in baseline_results:
if testname == item[0]:
found = True
raw_score_num = float(item[1])
raw_sigma_num = float(item[2])
raw_score = "%7.1f" % raw_score_num
compare_num = 0
compare_score = ""
percent_color = ""
if testname in v8_benchmarks:
compare_num = 100*time/raw_score_num - 100
else:
compare_num = 100*raw_score_num/time - 100
if abs(compare_num) > 0.1:
compare_score = "%3.1f" % (compare_num)
z = ComputeZ(raw_score_num, raw_sigma_num, time, num)
p = ComputeProbability(z)
percent_color = PercentColor(compare_num, p)
sigma_string = NormalizedSigmaToString(raw_sigma_num / raw_score_num)
if opts.html:
format_string = "<td>%s%8s$RESET</td><td>%s%6s$RESET</td>"
else:
format_string = " %s%8s$RESET %s %s%6s$RESET |"
line_out += (format_string %
(base_color, raw_score, sigma_string,
percent_color, compare_score))
if not found:
if opts.html:
line_out += "<td></td><td></td>"
else:
line_out += "| | "
if opts.html:
line_out += "</tr>"
print(ColorText(opts, line_out))
def PrintSeparator(opts, baselines, big):
if not opts.html:
if big:
separator = "==================================================="
else:
separator = "---------------------------------------------------"
for baseline in baselines:
if big:
separator += "+==========+========"
else:
separator += "+----------+--------"
separator += "+"
print(separator)
def ProcessResults(opts, results, baselines):
for suite in suite_names:
if suite in results:
for result in results[suite]:
ProcessOneResultLine(opts, suite, result[0], result[1], result[2],
result[3], baselines);
PrintSeparator(opts, baselines, False)
def ProcessFile(file_path):
file_reader = csv.reader(open(file_path, 'rb'), delimiter=',')
benchmark_results = {}
current_rows = []
for row in file_reader:
if len(row) > 1:
current_rows.append(row)
for suite in suite_names:
if row[0] == suite:
benchmark_results[row[0]] = current_rows
current_rows = []
return benchmark_results
def ProcessStdIn():
benchmark_results = {}
current_rows = []
for line_in in sys.stdin:
line_in = line_in.rstrip()
row = line_in.split(",")
if len(row) > 1:
current_rows.append(row)
for suite in suite_names:
if row[0] == suite:
benchmark_results[row[0]] = current_rows
current_rows = []
return benchmark_results
def CompareFiles(opts, args):
results = []
baselines = []
for file_path in args:
baseline = ProcessFile(file_path)
baselines.append((os.path.basename(file_path), baseline))
if opts.html:
header = "<tr><th>benchmark</th><th>score</th>"
else:
header = "%40s: %8s " % ("benchmark", "score")
for baseline in baselines:
(baseline_name, baseline_results) = baseline
if opts.html:
header += ("<th>%s</th><th>%s</th>") % (baseline_name[0:7], "%")
else:
header += "| %8s | %6s " % (baseline_name[0:7], "%")
if opts.html:
header += "</tr>\n"
else:
header += "|"
print(header)
PrintSeparator(opts, baselines, True)
if opts.filename:
file_reader = csv.reader(open(opts.filename, 'rb'), delimiter=',')
results = ProcessFile(opts.filename)
else:
results = ProcessStdIn()
ProcessResults(opts, results, baselines)
if __name__ == '__main__':
parser = OptionParser(usage=__doc__)
parser.add_option("-f", "--filename", dest="filename",
help="Specifies the filename for the results to "\
"compare to the baselines rather than reading from stdin.")
parser.add_option("-b", "--baselines", dest="baselines",
help="Specifies a directory of baseline files to "\
"compare against.")
parser.add_option("-n", "--no-color", action="store_true",
dest="no_color", default=False,
help="Generates output without escape codes that "\
"add color highlights.")
parser.add_option("--html", action="store_true",
dest="html", default=False,
help="Generates output as a HTML table ")
(opts, args) = parser.parse_args()
if opts.baselines:
args.extend(map(lambda x: (opts.baselines + "/" + x),
(os.listdir(opts.baselines))))
args = reversed(sorted(args))
CompareFiles(opts, args)