Remove obsolete bench analysis scripts

R=bungeman@google.com
BUG=skia:5459
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2125953002

Review-Url: https://codereview.chromium.org/2125953002
This commit is contained in:
tomhudson 2016-07-11 10:26:56 -07:00 committed by Commit bot
parent 5608e2ed22
commit 63d14413be
4 changed files with 0 additions and 1051 deletions

View File

@ -1,193 +0,0 @@
#!/usr/bin/env python
'''
Created on May 16, 2011
@author: bungeman
'''
import sys
import getopt
import bench_util
def usage():
"""Prints simple usage information."""
print '-o <file> the old bench output file.'
print '-n <file> the new bench output file.'
print '-h causes headers to be output.'
print '-s <stat> the type of statistical analysis used'
print ' Not specifying is the same as -s "avg".'
print ' avg: average of all data points'
print ' min: minimum of all data points'
print ' med: median of all data points'
print ' 25th: twenty-fifth percentile for all data points'
print '-f <fieldSpec> which fields to output and in what order.'
print ' Not specifying is the same as -f "bctondp".'
print ' b: bench'
print ' c: config'
print ' t: time type'
print ' o: old time'
print ' n: new time'
print ' d: diff'
print ' p: percent diff'
print '-t use tab delimited format for output.'
print '--match <bench> only matches benches which begin with <bench>.'
class BenchDiff:
"""A compare between data points produced by bench.
(BenchDataPoint, BenchDataPoint)"""
def __init__(self, old, new):
self.old = old
self.new = new
self.diff = old.time - new.time
diffp = 0
if old.time != 0:
diffp = self.diff / old.time
self.diffp = diffp
def __repr__(self):
return "BenchDiff(%s, %s)" % (
str(self.new),
str(self.old),
)
def main():
"""Parses command line and writes output."""
try:
opts, _ = getopt.getopt(sys.argv[1:], "f:o:n:s:ht", ['match='])
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
old = None
new = None
column_format = ""
header_format = ""
columns = 'bctondp'
header = False
stat_type = "avg"
use_tabs = False
match_bench = None;
for option, value in opts:
if option == "-o":
old = value
elif option == "-n":
new = value
elif option == "-h":
header = True
elif option == "-f":
columns = value
elif option == "-s":
stat_type = value
elif option == "-t":
use_tabs = True
elif option == "--match":
match_bench = value
else:
usage()
assert False, "unhandled option"
if old is None or new is None:
usage()
sys.exit(2)
old_benches = bench_util.parse({}, open(old, 'r'), stat_type)
new_benches = bench_util.parse({}, open(new, 'r'), stat_type)
bench_diffs = []
for old_bench in old_benches:
#filter benches by the match criteria
if match_bench and not old_bench.bench.startswith(match_bench):
continue
#filter new_benches for benches that match old_bench
new_bench_match = [bench for bench in new_benches
if old_bench.bench == bench.bench and
old_bench.config == bench.config and
old_bench.time_type == bench.time_type
]
if (len(new_bench_match) < 1):
continue
bench_diffs.append(BenchDiff(old_bench, new_bench_match[0]))
if use_tabs:
column_formats = {
'b' : '{bench}\t',
'c' : '{config}\t',
't' : '{time_type}\t',
'o' : '{old_time: 0.2f}\t',
'n' : '{new_time: 0.2f}\t',
'd' : '{diff: 0.2f}\t',
'p' : '{diffp: 0.1%}\t',
}
header_formats = {
'b' : '{bench}\t',
'c' : '{config}\t',
't' : '{time_type}\t',
'o' : '{old_time}\t',
'n' : '{new_time}\t',
'd' : '{diff}\t',
'p' : '{diffp}\t',
}
else:
bench_max_len = max(map(lambda b: len(b.old.bench), bench_diffs))
config_max_len = max(map(lambda b: len(b.old.config), bench_diffs))
column_formats = {
'b' : '{bench: >%d} ' % (bench_max_len),
'c' : '{config: <%d} ' % (config_max_len),
't' : '{time_type: <4} ',
'o' : '{old_time: >10.2f} ',
'n' : '{new_time: >10.2f} ',
'd' : '{diff: >+10.2f} ',
'p' : '{diffp: >+8.1%} ',
}
header_formats = {
'b' : '{bench: >%d} ' % (bench_max_len),
'c' : '{config: <%d} ' % (config_max_len),
't' : '{time_type: <4} ',
'o' : '{old_time: >10} ',
'n' : '{new_time: >10} ',
'd' : '{diff: >10} ',
'p' : '{diffp: >8} ',
}
for column_char in columns:
if column_formats[column_char]:
column_format += column_formats[column_char]
header_format += header_formats[column_char]
else:
usage()
sys.exit(2)
if header:
print header_format.format(
bench='bench'
, config='conf'
, time_type='time'
, old_time='old'
, new_time='new'
, diff='diff'
, diffp='diffP'
)
bench_diffs.sort(key=lambda d : [d.diffp,
d.old.bench,
d.old.config,
d.old.time_type,
])
for bench_diff in bench_diffs:
print column_format.format(
bench=bench_diff.old.bench.strip()
, config=bench_diff.old.config.strip()
, time_type=bench_diff.old.time_type
, old_time=bench_diff.old.time
, new_time=bench_diff.new.time
, diff=bench_diff.diff
, diffp=bench_diff.diffp
)
if __name__ == "__main__":
main()

View File

@ -1,356 +0,0 @@
'''
Created on May 19, 2011
@author: bungeman
'''
import os
import re
import math
# bench representation algorithm constant names
ALGORITHM_AVERAGE = 'avg'
ALGORITHM_MEDIAN = 'med'
ALGORITHM_MINIMUM = 'min'
ALGORITHM_25TH_PERCENTILE = '25th'
# Regular expressions used throughout.
PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?'
SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)'
BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)'
# non-per-tile benches have configs that don't end with ']' or '>'
CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)'
# per-tile bench lines are in the following format. Note that there are
# non-averaged bench numbers in separate lines, which we ignore now due to
# their inaccuracy.
TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
' ((?:' + TIME_RE + '\s+)+)')
# for extracting tile layout
TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: '
PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE)
SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE)
BENCH_RE_COMPILED = re.compile(BENCH_RE)
TIME_RE_COMPILED = re.compile(TIME_RE)
CONFIG_RE_COMPILED = re.compile(CONFIG_RE)
TILE_RE_COMPILED = re.compile(TILE_RE)
TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE)
class BenchDataPoint:
"""A single data point produced by bench.
"""
def __init__(self, bench, config, time_type, time, settings,
tile_layout='', per_tile_values=[], per_iter_time=[]):
# string name of the benchmark to measure
self.bench = bench
# string name of the configurations to run
self.config = config
# type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu)
self.time_type = time_type
# float number of the bench time value
self.time = time
# dictionary of the run settings
self.settings = settings
# how tiles cover the whole picture: '5x3' means 5 columns and 3 rows
self.tile_layout = tile_layout
# list of float for per_tile bench values, if applicable
self.per_tile_values = per_tile_values
# list of float for per-iteration bench time, if applicable
self.per_iter_time = per_iter_time
def __repr__(self):
return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
str(self.bench),
str(self.config),
str(self.time_type),
str(self.time),
str(self.settings),
)
class _ExtremeType(object):
"""Instances of this class compare greater or less than other objects."""
def __init__(self, cmpr, rep):
object.__init__(self)
self._cmpr = cmpr
self._rep = rep
def __cmp__(self, other):
if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
return 0
return self._cmpr
def __repr__(self):
return self._rep
Max = _ExtremeType(1, "Max")
Min = _ExtremeType(-1, "Min")
class _ListAlgorithm(object):
"""Algorithm for selecting the representation value from a given list.
representation is one of the ALGORITHM_XXX representation types."""
def __init__(self, data, representation=None):
if not representation:
representation = ALGORITHM_AVERAGE # default algorithm
self._data = data
self._len = len(data)
if representation == ALGORITHM_AVERAGE:
self._rep = sum(self._data) / self._len
else:
self._data.sort()
if representation == ALGORITHM_MINIMUM:
self._rep = self._data[0]
else:
# for percentiles, we use the value below which x% of values are
# found, which allows for better detection of quantum behaviors.
if representation == ALGORITHM_MEDIAN:
x = int(round(0.5 * self._len + 0.5))
elif representation == ALGORITHM_25TH_PERCENTILE:
x = int(round(0.25 * self._len + 0.5))
else:
raise Exception("invalid representation algorithm %s!" %
representation)
self._rep = self._data[x - 1]
def compute(self):
return self._rep
def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench,
value_dic, layout_dic):
"""Parses given bench time line with regex and adds data to value_dic.
config_re_compiled: precompiled regular expression for parsing the config
line.
is_per_tile: boolean indicating whether this is a per-tile bench.
If so, we add tile layout into layout_dic as well.
line: input string line to parse.
bench: name of bench for the time values.
value_dic: dictionary to store bench values. See bench_dic in parse() below.
layout_dic: dictionary to store tile layouts. See parse() for descriptions.
"""
for config in config_re_compiled.finditer(line):
current_config = config.group(1)
tile_layout = ''
if is_per_tile: # per-tile bench, add name prefix
current_config = 'tile_' + current_config
layouts = TILE_LAYOUT_RE_COMPILED.search(line)
if layouts and len(layouts.groups()) == 2:
tile_layout = '%sx%s' % layouts.groups()
times = config.group(2)
for new_time in TIME_RE_COMPILED.finditer(times):
current_time_type = new_time.group(1)
iters = [float(i) for i in
new_time.group(2).strip().split(',')]
value_dic.setdefault(bench, {}).setdefault(
current_config, {}).setdefault(current_time_type, []).append(
iters)
layout_dic.setdefault(bench, {}).setdefault(
current_config, {}).setdefault(current_time_type, tile_layout)
def parse_skp_bench_data(directory, revision, rep, default_settings=None):
"""Parses all the skp bench data in the given directory.
Args:
directory: string of path to input data directory.
revision: git hash revision that matches the data to process.
rep: bench representation algorithm, see bench_util.py.
default_settings: dictionary of other run settings. See writer.option() in
bench/benchmain.cpp.
Returns:
A list of BenchDataPoint objects.
"""
revision_data_points = []
file_list = os.listdir(directory)
file_list.sort()
for bench_file in file_list:
scalar_type = None
# Scalar type, if any, is in the bench filename after 'scalar_'.
if (bench_file.startswith('bench_' + revision + '_data_')):
if bench_file.find('scalar_') > 0:
components = bench_file.split('_')
scalar_type = components[components.index('scalar') + 1]
else: # Skips non skp bench files.
continue
with open('/'.join([directory, bench_file]), 'r') as file_handle:
settings = dict(default_settings or {})
settings['scalar'] = scalar_type
revision_data_points.extend(parse(settings, file_handle, rep))
return revision_data_points
# TODO(bensong): switch to reading JSON output when available. This way we don't
# need the RE complexities.
def parse(settings, lines, representation=None):
"""Parses bench output into a useful data structure.
({str:str}, __iter__ -> str) -> [BenchDataPoint]
representation is one of the ALGORITHM_XXX types."""
benches = []
current_bench = None
# [bench][config][time_type] -> [[per-iter values]] where per-tile config
# has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...],
# [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only
# contains one list of iterations [[iter1, iter2, ...]].
bench_dic = {}
# [bench][config][time_type] -> tile_layout
layout_dic = {}
for line in lines:
# see if this line is a settings line
settingsMatch = SETTINGS_RE_COMPILED.search(line)
if (settingsMatch):
settings = dict(settings)
for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)):
if (settingMatch.group(2)):
settings[settingMatch.group(1)] = settingMatch.group(2)
else:
settings[settingMatch.group(1)] = True
# see if this line starts a new bench
new_bench = BENCH_RE_COMPILED.search(line)
if new_bench:
current_bench = new_bench.group(1)
# add configs on this line to the bench_dic
if current_bench:
if line.startswith(' tile_') :
_ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench,
bench_dic, layout_dic)
else:
_ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line,
current_bench, bench_dic, layout_dic)
# append benches to list
for bench in bench_dic:
for config in bench_dic[bench]:
for time_type in bench_dic[bench][config]:
tile_layout = ''
per_tile_values = [] # empty for non-per-tile configs
per_iter_time = [] # empty for per-tile configs
bench_summary = None # a single final bench value
if len(bench_dic[bench][config][time_type]) > 1:
# per-tile config; compute representation for each tile
per_tile_values = [
_ListAlgorithm(iters, representation).compute()
for iters in bench_dic[bench][config][time_type]]
# use sum of each tile representation for total bench value
bench_summary = sum(per_tile_values)
# extract tile layout
tile_layout = layout_dic[bench][config][time_type]
else:
# get the list of per-iteration values
per_iter_time = bench_dic[bench][config][time_type][0]
bench_summary = _ListAlgorithm(
per_iter_time, representation).compute()
benches.append(BenchDataPoint(
bench,
config,
time_type,
bench_summary,
settings,
tile_layout,
per_tile_values,
per_iter_time))
return benches
class LinearRegression:
"""Linear regression data based on a set of data points.
([(Number,Number)])
There must be at least two points for this to make sense."""
def __init__(self, points):
n = len(points)
max_x = Min
min_x = Max
Sx = 0.0
Sy = 0.0
Sxx = 0.0
Sxy = 0.0
Syy = 0.0
for point in points:
x = point[0]
y = point[1]
max_x = max(max_x, x)
min_x = min(min_x, x)
Sx += x
Sy += y
Sxx += x*x
Sxy += x*y
Syy += y*y
denom = n*Sxx - Sx*Sx
if (denom != 0.0):
B = (n*Sxy - Sx*Sy) / denom
else:
B = 0.0
a = (1.0/n)*(Sy - B*Sx)
se2 = 0
sB2 = 0
sa2 = 0
if (n >= 3 and denom != 0.0):
se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom))
sB2 = (n*se2) / denom
sa2 = sB2 * (1.0/n) * Sxx
self.slope = B
self.intercept = a
self.serror = math.sqrt(max(0, se2))
self.serror_slope = math.sqrt(max(0, sB2))
self.serror_intercept = math.sqrt(max(0, sa2))
self.max_x = max_x
self.min_x = min_x
def __repr__(self):
return "LinearRegression(%s, %s, %s, %s, %s)" % (
str(self.slope),
str(self.intercept),
str(self.serror),
str(self.serror_slope),
str(self.serror_intercept),
)
def find_min_slope(self):
"""Finds the minimal slope given one standard deviation."""
slope = self.slope
intercept = self.intercept
error = self.serror
regr_start = self.min_x
regr_end = self.max_x
regr_width = regr_end - regr_start
if slope < 0:
lower_left_y = slope*regr_start + intercept - error
upper_right_y = slope*regr_end + intercept + error
return min(0, (upper_right_y - lower_left_y) / regr_width)
elif slope > 0:
upper_left_y = slope*regr_start + intercept + error
lower_right_y = slope*regr_end + intercept - error
return max(0, (lower_right_y - upper_left_y) / regr_width)
return 0
def CreateRevisionLink(revision_number):
"""Returns HTML displaying the given revision number and linking to
that revision's change page at code.google.com, e.g.
http://code.google.com/p/skia/source/detail?r=2056
"""
return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
revision_number, revision_number)
def main():
foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
LinearRegression(foo)
if __name__ == "__main__":
main()

View File

@ -1,223 +0,0 @@
#!/usr/bin/env python
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
""" Generate bench_expectations file from a given set of bench data files. """
import argparse
import bench_util
import json
import os
import re
import sys
import urllib2
# Parameters for calculating bench ranges.
RANGE_RATIO_UPPER = 1.5 # Ratio of range for upper bounds.
RANGE_RATIO_LOWER = 2.0 # Ratio of range for lower bounds.
ERR_RATIO = 0.08 # Further widens the range by the ratio of average value.
ERR_UB = 1.0 # Adds an absolute upper error to cope with small benches.
ERR_LB = 1.5
# List of bench configs to monitor. Ignore all other configs.
CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',
'simple_viewport_1000x1000_angle',
'simple_viewport_1000x1000_gpu',
'simple_viewport_1000x1000_scalar_1.100000',
'simple_viewport_1000x1000_scalar_1.100000_gpu',
]
# List of flaky entries that should be excluded. Each entry is defined by a list
# of 3 strings, corresponding to the substrings of [bench, config, builder] to
# search for. A bench expectations line is excluded when each of the 3 strings
# in the list is a substring of the corresponding element of the given line. For
# instance, ['desk_yahooanswers', 'gpu', 'Ubuntu'] will skip expectation entries
# of SKP benchs whose name contains 'desk_yahooanswers' on all gpu-related
# configs of all Ubuntu builders.
ENTRIES_TO_EXCLUDE = [
]
_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'
def compute_ranges(benches, more_benches=None):
"""Given a list of bench numbers, calculate the alert range.
Args:
benches: a list of float bench values.
more_benches: a tuple of lists of additional bench values.
The first value of each tuple is the number of commits before the current
one that set of values is at, and the second value is a list of
bench results.
Returns:
a list of float [lower_bound, upper_bound].
"""
avg = sum(benches)/len(benches)
minimum = min(benches)
maximum = max(benches)
diff = maximum - minimum
return [minimum - diff*RANGE_RATIO_LOWER - avg*ERR_RATIO - ERR_LB,
maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]
def create_expectations_dict(revision_data_points, builder, extra_data=None):
"""Convert list of bench data points into a dictionary of expectations data.
Args:
revision_data_points: a list of BenchDataPoint objects.
builder: string of the corresponding buildbot builder name.
Returns:
a dictionary of this form:
keys = tuple of (config, bench) strings.
values = list of float [expected, lower_bound, upper_bound] for the key.
"""
bench_dict = {}
for point in revision_data_points:
if (point.time_type or # Not walltime which has time_type ''
not point.config in CONFIGS_TO_INCLUDE):
continue
to_skip = False
for bench_substr, config_substr, builder_substr in ENTRIES_TO_EXCLUDE:
if (bench_substr in point.bench and config_substr in point.config and
builder_substr in builder):
to_skip = True
break
if to_skip:
continue
key = (point.config, point.bench)
extras = []
for idx, dataset in extra_data:
for data in dataset:
if (data.bench == point.bench and data.config == point.config and
data.time_type == point.time_type and data.per_iter_time):
extras.append((idx, data.per_iter_time))
if key in bench_dict:
raise Exception('Duplicate bench entry: ' + str(key))
bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)
return bench_dict
def get_parent_commits(start_hash, num_back):
"""Returns a list of commits that are the parent of the commit passed in."""
list_commits = urllib2.urlopen(
'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %
(start_hash, num_back))
# NOTE: Very brittle. Removes the four extraneous characters
# so json can be read successfully
trunc_list = list_commits.read()[4:]
json_data = json.loads(trunc_list)
return [revision['commit'] for revision in json_data['log']]
def get_file_suffixes(commit_hash, directory):
"""Gets all the suffixes available in the directory"""
possible_files = os.listdir(directory)
prefix = 'bench_' + commit_hash + '_data_'
return [name[len(prefix):] for name in possible_files
if name.startswith(prefix)]
def download_bench_data(builder, commit_hash, suffixes, directory):
"""Downloads data, returns the number successfully downloaded"""
cur_files = os.listdir(directory)
count = 0
for suffix in suffixes:
file_name = 'bench_'+commit_hash+'_data_'+suffix
if file_name in cur_files:
continue
try:
src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))
with open(os.path.join(directory, file_name), 'w') as dest:
dest.writelines(src)
count += 1
except urllib2.HTTPError:
pass
return count
def main():
"""Reads bench data points, then calculate and export expectations.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'-a', '--representation_alg', default='25th',
help='bench representation algorithm to use, see bench_util.py.')
parser.add_argument(
'-b', '--builder', required=True,
help='name of the builder whose bench ranges we are computing.')
parser.add_argument(
'-d', '--input_dir', required=True,
help='a directory containing bench data files.')
parser.add_argument(
'-o', '--output_file', required=True,
help='file path and name for storing the output bench expectations.')
parser.add_argument(
'-r', '--git_revision', required=True,
help='the git hash to indicate the revision of input data to use.')
parser.add_argument(
'-t', '--back_track', required=False, default=10,
help='the number of commit hashes backwards to look to include' +
'in the calculations.')
parser.add_argument(
'-m', '--max_commits', required=False, default=1,
help='the number of commit hashes to include in the calculations.')
args = parser.parse_args()
builder = args.builder
data_points = bench_util.parse_skp_bench_data(
args.input_dir, args.git_revision, args.representation_alg)
parent_commits = get_parent_commits(args.git_revision, args.back_track)
print "Using commits: {}".format(parent_commits)
suffixes = get_file_suffixes(args.git_revision, args.input_dir)
print "Using suffixes: {}".format(suffixes)
# TODO(kelvinly): Find a better approach to than directly copying from
# the GS server?
downloaded_commits = []
for idx, commit in enumerate(parent_commits):
num_downloaded = download_bench_data(
builder, commit, suffixes, args.input_dir)
if num_downloaded > 0:
downloaded_commits.append((num_downloaded, idx, commit))
if len(downloaded_commits) < args.max_commits:
print ('Less than desired number of commits found. Please increase'
'--back_track in later runs')
trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]
extra_data = []
for _, idx, commit in trunc_commits:
extra_data.append((idx, bench_util.parse_skp_bench_data(
args.input_dir, commit, args.representation_alg)))
expectations_dict = create_expectations_dict(data_points, builder,
extra_data)
out_lines = []
keys = expectations_dict.keys()
keys.sort()
for (config, bench) in keys:
(expected, lower_bound, upper_bound) = expectations_dict[(config, bench)]
out_lines.append('%(bench)s_%(config)s_,%(builder)s-%(representation)s,'
'%(expected)s,%(lower_bound)s,%(upper_bound)s' % {
'bench': bench,
'config': config,
'builder': builder,
'representation': args.representation_alg,
'expected': expected,
'lower_bound': lower_bound,
'upper_bound': upper_bound})
with open(args.output_file, 'w') as file_handle:
file_handle.write('\n'.join(out_lines))
if __name__ == "__main__":
main()

View File

@ -1,279 +0,0 @@
#!/usr/bin/env python
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be found
# in the LICENSE file.
""" Analyze per-tile and viewport bench data, and output visualized results.
"""
__author__ = 'bensong@google.com (Ben Chen)'
import bench_util
import boto
import math
import optparse
import os
import re
import shutil
from oauth2_plugin import oauth2_plugin
# The default platform to analyze. Used when OPTION_PLATFORM flag is not set.
DEFAULT_PLATFORM = 'Nexus10_4-1_Float_Bench_32'
# Template for gsutil uri.
GOOGLE_STORAGE_URI_SCHEME = 'gs'
URI_BUCKET = 'chromium-skia-gm'
# Maximum number of rows of tiles to track for viewport covering.
MAX_TILE_ROWS = 8
# Constants for optparse.
USAGE_STRING = 'USAGE: %s [options]'
HOWTO_STRING = """
Note: to read bench data stored in Google Storage, you will need to set up the
corresponding Python library.
See http://developers.google.com/storage/docs/gspythonlibrary for details.
"""
HELP_STRING = """
For the given platform and revision number, find corresponding viewport and
tile benchmarks for each available picture bench, and output visualization and
analysis in HTML. By default it reads from Skia's Google Storage location where
bot data are stored, but if --dir is given, will read from local directory
instead.
""" + HOWTO_STRING
OPTION_DIR = '--dir'
OPTION_DIR_SHORT = '-d'
OPTION_REVISION = '--rev'
OPTION_REVISION_SHORT = '-r'
OPTION_PLATFORM = '--platform'
OPTION_PLATFORM_SHORT = '-p'
# Bench representation algorithm flag.
OPTION_REPRESENTATION_ALG = '--algorithm'
OPTION_REPRESENTATION_ALG_SHORT = '-a'
# Bench representation algorithm. See trunk/bench/bench_util.py.
REPRESENTATION_ALG = bench_util.ALGORITHM_25TH_PERCENTILE
# Constants for bench file matching.
GOOGLE_STORAGE_OBJECT_NAME_PREFIX = 'perfdata/Skia_'
BENCH_FILE_PREFIX_TEMPLATE = 'bench_r%s_'
TILING_FILE_NAME_INDICATOR = '_tile_'
VIEWPORT_FILE_NAME_INDICATOR = '_viewport_'
# Regular expression for matching format '<integer>x<integer>'.
DIMENSIONS_RE = '(\d+)x(\d+)'
# HTML and JS output templates.
HTML_PREFIX = """
<html><head><script type="text/javascript" src="https://www.google.com/jsapi">
</script><script type="text/javascript">google.load("visualization", "1.1",
{packages:["table"]});google.load("prototype", "1.6");</script>
<script type="text/javascript" src="https://systemsbiology-visualizations.googlecode.com/svn/trunk/src/main/js/load.js"></script><script
type="text/javascript"> systemsbiology.load("visualization", "1.0",
{packages:["bioheatmap"]});</script><script type="text/javascript">
google.setOnLoadCallback(drawVisualization); function drawVisualization() {
"""
HTML_SUFFIX = '</body></html>'
BAR_CHART_TEMPLATE = ('<img src="https://chart.googleapis.com/chart?chxr=0,0,'
'300&chxt=x&chbh=15,0&chs=600x150&cht=bhg&chco=80C65A,224499,FF0000,0A8C8A,'
'EBB671,DE091A,000000,00ffff&chds=a&chdl=%s&chd=t:%s" /><br>\n')
DRAW_OPTIONS = ('{passThroughBlack:false,useRowLabels:false,cellWidth:30,'
'cellHeight:30}')
TABLE_OPTIONS = '{showRowNumber:true,firstRowNumber:" ",sort:"disable"}'
def GetFiles(rev, bench_dir, platform):
"""Reads in bench files of interest into a dictionary.
If bench_dir is not empty, tries to read in local bench files; otherwise check
Google Storage. Filters files by revision (rev) and platform, and ignores
non-tile, non-viewport bench files.
Outputs dictionary [filename] -> [file content].
"""
file_dic = {}
if not bench_dir:
uri = boto.storage_uri(URI_BUCKET, GOOGLE_STORAGE_URI_SCHEME)
# The boto API does not allow prefix/wildcard matching of Google Storage
# objects. And Google Storage has a flat structure instead of being
# organized in directories. Therefore, we have to scan all objects in the
# Google Storage bucket to find the files we need, which is slow.
# The option of implementing prefix matching as in gsutil seems to be
# overkill, but gsutil does not provide an API ready for use. If speed is a
# big concern, we suggest copying bot bench data from Google Storage using
# gsutil and use --log_dir for fast local data reading.
for obj in uri.get_bucket():
# Filters out files of no interest.
if (not obj.name.startswith(GOOGLE_STORAGE_OBJECT_NAME_PREFIX) or
(obj.name.find(TILING_FILE_NAME_INDICATOR) < 0 and
obj.name.find(VIEWPORT_FILE_NAME_INDICATOR) < 0) or
obj.name.find(platform) < 0 or
obj.name.find(BENCH_FILE_PREFIX_TEMPLATE % rev) < 0):
continue
file_dic[
obj.name[obj.name.rfind('/') + 1 : ]] = obj.get_contents_as_string()
else:
for f in os.listdir(bench_dir):
if (not os.path.isfile(os.path.join(bench_dir, f)) or
(f.find(TILING_FILE_NAME_INDICATOR) < 0 and
f.find(VIEWPORT_FILE_NAME_INDICATOR) < 0) or
not f.startswith(BENCH_FILE_PREFIX_TEMPLATE % rev)):
continue
file_dic[f] = open(os.path.join(bench_dir, f)).read()
if not file_dic:
raise Exception('No bench file found in "%s" or Google Storage.' %
bench_dir)
return file_dic
def GetTileMatrix(layout, tile_size, values, viewport):
"""For the given tile layout and per-tile bench values, returns a matrix of
bench values with tiles outside the given viewport set to 0.
layout, tile_size and viewport are given in string of format <w>x<h>, where
<w> is viewport width or number of tile columns, and <h> is viewport height or
number of tile rows. We truncate tile rows to MAX_TILE_ROWS to adjust for very
long skp's.
values: per-tile benches ordered row-by-row, starting from the top-left tile.
Returns [sum, matrix] where sum is the total bench tile time that covers the
viewport, and matrix is used for visualizing the tiles.
"""
[tile_cols, tile_rows] = [int(i) for i in layout.split('x')]
[tile_x, tile_y] = [int(i) for i in tile_size.split('x')]
[viewport_x, viewport_y] = [int(i) for i in viewport.split('x')]
viewport_cols = int(math.ceil(viewport_x * 1.0 / tile_x))
viewport_rows = int(math.ceil(viewport_y * 1.0 / tile_y))
truncated_tile_rows = min(tile_rows, MAX_TILE_ROWS)
viewport_tile_sum = 0
matrix = [[0 for y in range(tile_cols)] for x in range(truncated_tile_rows)]
for y in range(min(viewport_cols, tile_cols)):
for x in range(min(truncated_tile_rows, viewport_rows)):
matrix[x][y] = values[x * tile_cols + y]
viewport_tile_sum += values[x * tile_cols + y]
return [viewport_tile_sum, matrix]
def GetTileVisCodes(suffix, matrix):
"""Generates and returns strings of [js_codes, row1, row2] which are codes for
visualizing the benches from the given tile config and matrix data.
row1 is used for the first row of heatmaps; row2 is for corresponding tables.
suffix is only used to avoid name conflicts in the whole html output.
"""
this_js = 'var data_%s=new google.visualization.DataTable();' % suffix
for i in range(len(matrix[0])):
this_js += 'data_%s.addColumn("number","%s");' % (suffix, i)
this_js += 'data_%s.addRows(%s);' % (suffix, str(matrix))
# Adds heatmap chart.
this_js += ('var heat_%s=new org.systemsbiology.visualization' % suffix +
'.BioHeatMap(document.getElementById("%s"));' % suffix +
'heat_%s.draw(data_%s,%s);' % (suffix, suffix, DRAW_OPTIONS))
# Adds data table chart.
this_js += ('var table_%s=new google.visualization.Table(document.' % suffix +
'getElementById("t%s"));table_%s.draw(data_%s,%s);\n' % (
suffix, suffix, suffix, TABLE_OPTIONS))
table_row1 = '<td>%s<div id="%s"></div></td>' % (suffix, suffix)
table_row2 = '<td><div id="t%s"></div></td>' % suffix
return [this_js, table_row1, table_row2]
def OutputTileAnalysis(rev, representation_alg, bench_dir, platform):
"""Reads skp bench data and outputs tile vs. viewport analysis for the given
platform.
Ignores data with revisions other than rev. If bench_dir is not empty, read
from the local directory instead of Google Storage.
Uses the provided representation_alg for calculating bench representations.
Returns (js_codes, body_codes): strings of js/html codes for stats and
visualization.
"""
js_codes = ''
body_codes = ('}</script></head><body>'
'<h3>PLATFORM: %s REVISION: %s</h3><br>' % (platform, rev))
bench_dic = {} # [bench][config] -> [layout, [values]]
file_dic = GetFiles(rev, bench_dir, platform)
for f in file_dic:
for point in bench_util.parse('', file_dic[f].split('\n'),
representation_alg):
if point.time_type: # Ignores non-walltime time_type.
continue
bench = point.bench.replace('.skp', '')
config = point.config.replace('simple_', '')
components = config.split('_')
if components[0] == 'viewport':
bench_dic.setdefault(bench, {})[config] = [components[1], [point.time]]
else: # Stores per-tile benches.
bench_dic.setdefault(bench, {})[config] = [
point.tile_layout, point.per_tile_values]
benches = bench_dic.keys()
benches.sort()
for bench in benches:
body_codes += '<h4>%s</h4><br><table><tr>' % bench
heat_plots = '' # For table row of heatmap plots.
table_plots = '' # For table row of data table plots.
# For bar plot legends and values in URL string.
legends = ''
values = ''
keys = bench_dic[bench].keys()
keys.sort()
if not keys[-1].startswith('viewport'): # No viewport to analyze; skip.
continue
else:
# Extracts viewport size, which for all viewport configs is the same.
viewport = bench_dic[bench][keys[-1]][0]
for config in keys:
[layout, value_li] = bench_dic[bench][config]
if config.startswith('tile_'): # For per-tile data, visualize tiles.
tile_size = config.split('_')[1]
if (not re.search(DIMENSIONS_RE, layout) or
not re.search(DIMENSIONS_RE, tile_size) or
not re.search(DIMENSIONS_RE, viewport)):
continue # Skip unrecognized formats.
[viewport_tile_sum, matrix] = GetTileMatrix(
layout, tile_size, value_li, viewport)
values += '%s|' % viewport_tile_sum
[this_js, row1, row2] = GetTileVisCodes(config + '_' + bench, matrix)
heat_plots += row1
table_plots += row2
js_codes += this_js
else: # For viewport data, there is only one element in value_li.
values += '%s|' % sum(value_li)
legends += '%s:%s|' % (config, sum(value_li))
body_codes += (heat_plots + '</tr><tr>' + table_plots + '</tr></table>' +
'<br>' + BAR_CHART_TEMPLATE % (legends[:-1], values[:-1]))
return (js_codes, body_codes)
def main():
"""Parses flags and outputs expected Skia picture bench results."""
parser = optparse.OptionParser(USAGE_STRING % '%prog' + HELP_STRING)
parser.add_option(OPTION_PLATFORM_SHORT, OPTION_PLATFORM,
dest='plat', default=DEFAULT_PLATFORM,
help='Platform to analyze. Set to DEFAULT_PLATFORM if not given.')
parser.add_option(OPTION_REVISION_SHORT, OPTION_REVISION,
dest='rev',
help='(Mandatory) revision number to analyze.')
parser.add_option(OPTION_DIR_SHORT, OPTION_DIR,
dest='log_dir', default='',
help=('(Optional) local directory where bench log files reside. If left '
'empty (by default), will try to read from Google Storage.'))
parser.add_option(OPTION_REPRESENTATION_ALG_SHORT, OPTION_REPRESENTATION_ALG,
dest='alg', default=REPRESENTATION_ALG,
help=('Bench representation algorithm. '
'Default to "%s".' % REPRESENTATION_ALG))
(options, args) = parser.parse_args()
if not (options.rev and options.rev.isdigit()):
parser.error('Please provide correct mandatory flag %s' % OPTION_REVISION)
return
rev = int(options.rev)
(js_codes, body_codes) = OutputTileAnalysis(
rev, options.alg, options.log_dir, options.plat)
print HTML_PREFIX + js_codes + body_codes + HTML_SUFFIX
if '__main__' == __name__:
main()