#!/usr/bin/env python # Copyright 2017 the V8 project authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. ''' python %prog Compare perf trybot JSON files and output the results into a pleasing HTML page. Examples: %prog -t "ia32 results" Result,../result.json Master,/path-to/master.json -o results.html %prog -t "x64 results" ../result.json master.json -o results.html ''' from collections import OrderedDict import commands import json import math from argparse import ArgumentParser import os import shutil import sys import tempfile PERCENT_CONSIDERED_SIGNIFICANT = 0.5 PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02 PROBABILITY_CONSIDERED_MEANINGLESS = 0.05 class Statistics: @staticmethod def Mean(values): return float(sum(values)) / len(values) @staticmethod def Variance(values, average): return map(lambda x: (x - average) ** 2, values) @staticmethod def StandardDeviation(values, average): return math.sqrt(Statistics.Mean(Statistics.Variance(values, average))) @staticmethod def ComputeZ(baseline_avg, baseline_sigma, mean, n): if baseline_sigma == 0: return 1000.0; return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n))) # Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html @staticmethod def ComputeProbability(z): if z > 2.575829: # p 0.005: two sided < 0.01 return 0 if z > 2.326348: # p 0.010 return 0.01 if z > 2.170091: # p 0.015 return 0.02 if z > 2.053749: # p 0.020 return 0.03 if z > 1.959964: # p 0.025: two sided < 0.05 return 0.04 if z > 1.880793: # p 0.030 return 0.05 if z > 1.811910: # p 0.035 return 0.06 if z > 1.750686: # p 0.040 return 0.07 if z > 1.695397: # p 0.045 return 0.08 if z > 1.644853: # p 0.050: two sided < 0.10 return 0.09 if z > 1.281551: # p 0.100: two sided < 0.20 return 0.10 return 0.20 # two sided p >= 0.20 class ResultsDiff: def __init__(self, significant, notable, percentage_string): self.significant_ = significant self.notable_ = notable self.percentage_string_ = percentage_string def percentage_string(self): return self.percentage_string_; def isSignificant(self): return self.significant_ def isNotablyPositive(self): return self.notable_ > 0 def isNotablyNegative(self): return self.notable_ < 0 class BenchmarkResult: def __init__(self, units, count, result, sigma): self.units_ = units self.count_ = float(count) self.result_ = float(result) self.sigma_ = float(sigma) def Compare(self, other): if self.units_ != other.units_: print ("Incompatible units: %s and %s" % (self.units_, other.units_)) sys.exit(1) significant = False notable = 0 percentage_string = "" # compute notability and significance. if self.units_ == "score": compare_num = 100*self.result_/other.result_ - 100 else: compare_num = 100*other.result_/self.result_ - 100 if abs(compare_num) > 0.1: percentage_string = "%3.1f" % (compare_num) z = Statistics.ComputeZ(other.result_, other.sigma_, self.result_, self.count_) p = Statistics.ComputeProbability(z) if p < PROBABILITY_CONSIDERED_SIGNIFICANT: significant = True if compare_num >= PERCENT_CONSIDERED_SIGNIFICANT: notable = 1 elif compare_num <= -PERCENT_CONSIDERED_SIGNIFICANT: notable = -1 return ResultsDiff(significant, notable, percentage_string) def result(self): return self.result_ def sigma(self): return self.sigma_ class Benchmark: def __init__(self, name): self.name_ = name self.runs_ = {} def name(self): return self.name_ def getResult(self, run_name): return self.runs_.get(run_name) def appendResult(self, run_name, trace): values = map(float, trace['results']) count = len(values) mean = Statistics.Mean(values) stddev = float(trace.get('stddev') or Statistics.StandardDeviation(values, mean)) units = trace["units"] # print run_name, units, count, mean, stddev self.runs_[run_name] = BenchmarkResult(units, count, mean, stddev) class BenchmarkSuite: def __init__(self, name): self.name_ = name self.benchmarks_ = {} def SortedTestKeys(self): keys = self.benchmarks_.keys() keys.sort() t = "Total" if t in keys: keys.remove(t) keys.append(t) return keys def name(self): return self.name_ def getBenchmark(self, benchmark_name): benchmark_object = self.benchmarks_.get(benchmark_name) if benchmark_object == None: benchmark_object = Benchmark(benchmark_name) self.benchmarks_[benchmark_name] = benchmark_object return benchmark_object class ResultTableRenderer: def __init__(self, output_file): self.benchmarks_ = [] self.print_output_ = [] self.output_file_ = output_file def Print(self, str_data): self.print_output_.append(str_data) def FlushOutput(self): string_data = "\n".join(self.print_output_) print_output = [] if self.output_file_: # create a file with open(self.output_file_, "w") as text_file: text_file.write(string_data) else: print(string_data) def bold(self, data): return "%s" % data def red(self, data): return "%s" % data def green(self, data): return "%s" % data def PrintHeader(self): data = """
Test | ") main_run = None for run_name in run_names: self.Print("%s | " % run_name) if main_run == None: main_run = run_name else: self.Print("% | ") self.Print("") self.Print("") def FinishSuite(self): self.Print("") self.Print("
---|