115 lines
3.0 KiB
Python
115 lines
3.0 KiB
Python
|
#!/usr/bin/python2
|
||
|
#
|
||
|
# Copyright 2019 Google Inc.
|
||
|
#
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
#
|
||
|
# Helper script that takes as input 2 CSVs downloaded from perf.skia.org and
|
||
|
# outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2),
|
||
|
# perc_diff between avg_value1 and avg_value2.
|
||
|
# This script also discards NUM_OUTLIERS_TO_REMOVE min values and
|
||
|
# NUM_OUTLIERS_TO_REMOVE max values.
|
||
|
|
||
|
|
||
|
import csv
|
||
|
import optparse
|
||
|
import sys
|
||
|
import re
|
||
|
|
||
|
|
||
|
MISSING_STR = 'N/A'
|
||
|
NUM_OUTLIERS_TO_REMOVE = 2
|
||
|
|
||
|
|
||
|
def read_from_csv(csv_file):
|
||
|
test_to_avg = {}
|
||
|
with open(csv_file, 'rb') as f:
|
||
|
csv_reader = csv.reader(f, delimiter=',')
|
||
|
# First row should contain headers. Validate that it does.
|
||
|
header_row = csv_reader.next()
|
||
|
if header_row[0] != 'id':
|
||
|
raise Exception('%s in unexpected format' % csv_file)
|
||
|
p = re.compile('^.*,test=(.*),$')
|
||
|
for v in csv_reader:
|
||
|
# Extract the test name.
|
||
|
result = p.search(v[0])
|
||
|
test_name = result.group(1)
|
||
|
|
||
|
vals = [float(i) for i in v[1:]]
|
||
|
vals.sort()
|
||
|
# Discard outliers.
|
||
|
vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE]
|
||
|
# Find the avg val.
|
||
|
avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals))
|
||
|
test_to_avg[test_name] = avg_val
|
||
|
return test_to_avg
|
||
|
|
||
|
|
||
|
def combine_results(d1, d2):
|
||
|
test_to_result = {}
|
||
|
for test1, v1 in d1.items():
|
||
|
v2 = d2.get(test1, MISSING_STR)
|
||
|
perc_diff = MISSING_STR
|
||
|
if v2 != MISSING_STR:
|
||
|
diff = v2 - v1
|
||
|
avg = (v2 + v1)/2
|
||
|
perc_diff = 0 if avg == 0 else diff/avg * 100
|
||
|
result = {
|
||
|
'test_name': test1,
|
||
|
'csv1': v1,
|
||
|
'csv2': v2,
|
||
|
'perc_diff': perc_diff,
|
||
|
}
|
||
|
test_to_result[test1] = result
|
||
|
|
||
|
# Also add keys in d2 and not d1.
|
||
|
for test2, v2 in d2.items():
|
||
|
if test2 in test_to_result:
|
||
|
continue
|
||
|
test_to_result[test2] = {
|
||
|
'test_name': test2,
|
||
|
'csv1': MISSING_STR,
|
||
|
'csv2': v2,
|
||
|
'perc_diff': MISSING_STR,
|
||
|
}
|
||
|
|
||
|
return test_to_result
|
||
|
|
||
|
|
||
|
def write_to_csv(output_dict, output_csv):
|
||
|
with open(output_csv, 'w') as f:
|
||
|
fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff']
|
||
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||
|
writer.writeheader()
|
||
|
tests = output_dict.keys()
|
||
|
tests.sort()
|
||
|
for test in tests:
|
||
|
writer.writerow(output_dict[test])
|
||
|
|
||
|
|
||
|
def parse_and_output(csv1, csv2, output_csv):
|
||
|
test_to_avg1 = read_from_csv(csv1)
|
||
|
test_to_avg2 = read_from_csv(csv2)
|
||
|
output_dict = combine_results(test_to_avg1, test_to_avg2)
|
||
|
write_to_csv(output_dict, output_csv)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
option_parser = optparse.OptionParser()
|
||
|
option_parser.add_option(
|
||
|
'', '--csv1', type=str,
|
||
|
help='The first CSV to parse.')
|
||
|
option_parser.add_option(
|
||
|
'', '--csv2', type=str,
|
||
|
help='The second CSV to parse.')
|
||
|
option_parser.add_option(
|
||
|
'', '--output_csv', type=str,
|
||
|
help='The file to write the output CSV to.')
|
||
|
options, _ = option_parser.parse_args()
|
||
|
sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|