skia2/tools/skottie-wasm-perf/parse_perf_csvs.py

#!/usr/bin/python2
#
# Copyright 2019 Google Inc.
#
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Helper script that takes as input 2 CSVs downloaded from perf.skia.org and
# outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2),
# perc_diff between avg_value1 and avg_value2.
# This script also discards NUM_OUTLIERS_TO_REMOVE min values and
# NUM_OUTLIERS_TO_REMOVE max values.


import csv
import optparse
import sys
import re


MISSING_STR = 'N/A'
NUM_OUTLIERS_TO_REMOVE = 2


def read_from_csv(csv_file):
  test_to_avg = {}
  with open(csv_file, 'rb') as f:
    csv_reader = csv.reader(f, delimiter=',')
    # First row should contain headers. Validate that it does.
    header_row = csv_reader.next()
    if header_row[0] != 'id':
      raise Exception('%s in unexpected format' % csv_file)
    p = re.compile('^.*,test=(.*),$')
    for v in csv_reader:
      # Extract the test name.
      result = p.search(v[0])
      test_name = result.group(1)

      vals = [float(i) for i in v[1:]]
      vals.sort()
      # Discard outliers.
      vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE]
      # Find the avg val.
      avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals))
      test_to_avg[test_name] = avg_val
  return test_to_avg


def combine_results(d1, d2):
  test_to_result = {}
  for test1, v1 in d1.items():
    v2 = d2.get(test1, MISSING_STR)
    perc_diff = MISSING_STR
    if v2 != MISSING_STR:
      diff = v2 - v1
      avg = (v2 + v1)/2
      perc_diff = 0 if avg == 0 else diff/avg * 100
    result = {
        'test_name': test1,
        'csv1': v1,
        'csv2': v2,
        'perc_diff': perc_diff,
    }
    test_to_result[test1] = result

  # Also add keys in d2 and not d1.
  for test2, v2 in d2.items():
    if test2 in test_to_result:
      continue
    test_to_result[test2] = {
      'test_name': test2,
      'csv1': MISSING_STR,
      'csv2': v2,
      'perc_diff': MISSING_STR,
    }

  return test_to_result


def write_to_csv(output_dict, output_csv):
  with open(output_csv, 'w') as f:
    fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    tests = output_dict.keys()
    tests.sort()
    for test in tests:
      writer.writerow(output_dict[test])


def parse_and_output(csv1, csv2, output_csv):
  test_to_avg1 = read_from_csv(csv1)
  test_to_avg2 = read_from_csv(csv2)
  output_dict = combine_results(test_to_avg1, test_to_avg2)
  write_to_csv(output_dict, output_csv)


def main():
  option_parser = optparse.OptionParser()
  option_parser.add_option(
      '', '--csv1', type=str,
      help='The first CSV to parse.')
  option_parser.add_option(
      '', '--csv2', type=str,
      help='The second CSV to parse.')
  option_parser.add_option(
      '', '--output_csv', type=str,
      help='The file to write the output CSV to.')
  options, _ = option_parser.parse_args()
  sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv))


if __name__ == '__main__':
  main()
[perf_skottiewasm_lottieweb] Script to parse and combine skottie/lottieweb perf CSVs NoTry: true Bug: skia:9237 Change-Id: I663a8b5ebdc789e3ba02291b5f801fb6fbbf5894 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/229763 Commit-Queue: Ravi Mistry <rmistry@google.com> Reviewed-by: Joe Gregorio <jcgregorio@google.com> 2019-07-25 17:47:36 +00:00			`#!/usr/bin/python2`
			`#`
			`# Copyright 2019 Google Inc.`
			`#`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`
			`#`
			`# Helper script that takes as input 2 CSVs downloaded from perf.skia.org and`
			`# outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2),`
			`# perc_diff between avg_value1 and avg_value2.`
			`# This script also discards NUM_OUTLIERS_TO_REMOVE min values and`
			`# NUM_OUTLIERS_TO_REMOVE max values.`


			`import csv`
			`import optparse`
			`import sys`
			`import re`


			`MISSING_STR = 'N/A'`
			`NUM_OUTLIERS_TO_REMOVE = 2`


			`def read_from_csv(csv_file):`
			`test_to_avg = {}`
			`with open(csv_file, 'rb') as f:`
			`csv_reader = csv.reader(f, delimiter=',')`
			`# First row should contain headers. Validate that it does.`
			`header_row = csv_reader.next()`
			`if header_row[0] != 'id':`
			`raise Exception('%s in unexpected format' % csv_file)`
			`p = re.compile('^.,test=(.),$')`
			`for v in csv_reader:`
			`# Extract the test name.`
			`result = p.search(v[0])`
			`test_name = result.group(1)`

			`vals = [float(i) for i in v[1:]]`
			`vals.sort()`
			`# Discard outliers.`
			`vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE]`
			`# Find the avg val.`
			`avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals))`
			`test_to_avg[test_name] = avg_val`
			`return test_to_avg`


			`def combine_results(d1, d2):`
			`test_to_result = {}`
			`for test1, v1 in d1.items():`
			`v2 = d2.get(test1, MISSING_STR)`
			`perc_diff = MISSING_STR`
			`if v2 != MISSING_STR:`
			`diff = v2 - v1`
			`avg = (v2 + v1)/2`
			`perc_diff = 0 if avg == 0 else diff/avg * 100`
			`result = {`
			`'test_name': test1,`
			`'csv1': v1,`
			`'csv2': v2,`
			`'perc_diff': perc_diff,`
			`}`
			`test_to_result[test1] = result`

			`# Also add keys in d2 and not d1.`
			`for test2, v2 in d2.items():`
			`if test2 in test_to_result:`
			`continue`
			`test_to_result[test2] = {`
			`'test_name': test2,`
			`'csv1': MISSING_STR,`
			`'csv2': v2,`
			`'perc_diff': MISSING_STR,`
			`}`

			`return test_to_result`


			`def write_to_csv(output_dict, output_csv):`
			`with open(output_csv, 'w') as f:`
			`fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff']`
			`writer = csv.DictWriter(f, fieldnames=fieldnames)`
			`writer.writeheader()`
			`tests = output_dict.keys()`
			`tests.sort()`
			`for test in tests:`
			`writer.writerow(output_dict[test])`


			`def parse_and_output(csv1, csv2, output_csv):`
			`test_to_avg1 = read_from_csv(csv1)`
			`test_to_avg2 = read_from_csv(csv2)`
			`output_dict = combine_results(test_to_avg1, test_to_avg2)`
			`write_to_csv(output_dict, output_csv)`


			`def main():`
			`option_parser = optparse.OptionParser()`
			`option_parser.add_option(`
			`'', '--csv1', type=str,`
			`help='The first CSV to parse.')`
			`option_parser.add_option(`
			`'', '--csv2', type=str,`
			`help='The second CSV to parse.')`
			`option_parser.add_option(`
			`'', '--output_csv', type=str,`
			`help='The file to write the output CSV to.')`
			`options, _ = option_parser.parse_args()`
			`sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv))`


			`if __name__ == '__main__':`
			`main()`