v8/tools/callstats.py

#!/usr/bin/env python
# Copyright 2016 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''
Usage: runtime-call-stats.py [-h] <command> ...

Optional arguments:
  -h, --help  show this help message and exit

Commands:
  run         run chrome with --runtime-call-stats and generate logs
  stats       process logs and print statistics
  json        process logs from several versions and generate JSON
  help        help information

For each command, you can try ./runtime-call-stats.py help command.
'''

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile

import numpy
import scipy
import scipy.stats
from math import sqrt


# Run benchmarks.

DEFAULT_SITES = [
    # top websites (http://alexa.com/topsites): --------------------
    "https://www.google.de/search?q=v8",
    "https://www.youtube.com",
    "https://www.facebook.com/shakira",
    "http://www.baidu.com/s?wd=v8",
    "http://www.yahoo.co.jp",
    "http://www.amazon.com/s/?field-keywords=v8",
    "http://hi.wikipedia.org/wiki/" \
     "%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0",
    "http://www.qq.com",
    "http://www.twitter.com/taylorswift13",
    "http://www.reddit.com",
    "http://www.ebay.fr/sch/i.html?_nkw=v8",
    "http://edition.cnn.com",
    "http://world.taobao.com",
    "http://www.instagram.com/archdigest",
    "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",
    "http://www.msn.com/ar-ae",
    "http://www.bing.com/search?q=v8+engine",
    "http://www.pinterest.com/categories/popular",
    "http://www.sina.com.cn",
    "http://weibo.com",
    "http://yandex.ru/search/?text=v8",
    # framework driven decisions: -----------------------------------
    # wikipedia content + angularjs
    "http://www.wikiwand.com/en/hill",
    # ember website
    "http://meta.discourse.org",
    # backbone js
    "http://reddit.musicplayer.io",
    # gwt application
    "http://inbox.google.com",
    # webgl / algorithmic case
    "http://maps.google.co.jp/maps/search/restaurant+tokyo",
    # whatever framework adwords uses
    "https://adwords.google.com",
]


def print_command(cmd_args):
  def fix_for_printing(arg):
    m = re.match(r'^--([^=]+)=(.*)$', arg)
    if m and (' ' in m.group(2) or m.group(2).startswith('-')):
      arg = "--{}='{}'".format(m.group(1), m.group(2))
    elif ' ' in arg:
      arg = "'{}'".format(arg)
    return arg
  print " ".join(map(fix_for_printing, cmd_args))


def start_replay_server(args):
  cmd_args = [
      args.replay_bin,
      "--port=4080",
      "--ssl_port=4443",
      "--no-dns_forwarding",
      "--use_closest_match",
      "--no-diff_unknown_requests",
      args.replay_wpr,
  ]
  print "=" * 80
  print_command(cmd_args)
  with open(os.devnull, 'w') as null:
    server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
  print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
  print "=" * 80
  return server


def stop_replay_server(server):
  print("SHUTTING DOWN REPLAY SERVER %s" % server.pid)
  server.terminate()


def run_site(site, domain, args, timeout=None):
  print "="*80
  print "RUNNING DOMAIN %s" % domain
  print "="*80
  result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
  count = 0
  while count == 0 or args.repeat is not None and count < args.repeat:
    count += 1
    result = result_template.format(domain=domain, count=count)
    retries = 0
    while args.retries is None or retries < args.retries:
      retries += 1
      try:
        temp_user_data_dir = args.user_data_dir is None
        if temp_user_data_dir:
          user_data_dir = tempfile.mkdtemp(prefix="chr_")
        js_flags = "--runtime-call-stats"
        if args.js_flags: js_flags += " " + args.js_flags
        chrome_flags = [
            "--no-default-browser-check",
            "--disable-translate",
            "--single-process",
            "--no-sandbox",
            "--js-flags={}".format(js_flags),
            "--no-first-run",
            "--user-data-dir={}".format(user_data_dir),
        ]
        if args.replay_wpr:
          chrome_flags += [
              "--host-resolver-rules=MAP *:80 localhost:4080, "  \
                                    "MAP *:443 localhost:4443, " \
                                    "EXCLUDE localhost",
              "--ignore-certificate-errors",
              "--disable-web-security",
              "--reduce-security-for-testing",
              "--allow-insecure-localhost",
          ]
        if args.chrome_flags:
          chrome_flags += args.chrome_flags.split()
        if timeout is None: timeout = args.timeout
        cmd_args = [
            "timeout", str(timeout),
            args.with_chrome
        ] + chrome_flags + [ site ]
        print "- " * 40
        print_command(cmd_args)
        print "- " * 40
        with open(result, "wt") as f:
          status = subprocess.call(cmd_args, stdout=f)
        # 124 means timeout killed chrome, 0 means the user was bored first!
        # If none of these two happened, then chrome apparently crashed, so
        # it must be called again.
        if status != 124 and status != 0:
          print("CHROME CRASHED, REPEATING RUN");
          continue
        # If the stats file is empty, chrome must be called again.
        if os.path.isfile(result) and os.path.getsize(result) > 0:
          if args.print_url:
            with open(result, "at") as f:
              print >> f
              print >> f, "URL: {}".format(site)
          break
        print("EMPTY RESULT, REPEATING RUN");
      finally:
        if temp_user_data_dir:
          shutil.rmtree(user_data_dir)


def read_sites_file(args):
  try:
    sites = []
    try:
      with open(args.sites_file, "rt") as f:
        for item in json.load(f):
          if 'timeout' not in item:
            # This is more-or-less arbitrary.
            item['timeout'] = int(2.5 * item['timeline'] + 3)
          if item['timeout'] > args.timeout: item['timeout'] = args.timeout
          sites.append(item)
    except ValueError:
      with open(args.sites_file, "rt") as f:
        for line in f:
          line = line.strip()
          if not line or line.startswith('#'): continue
          sites.append({'url': line, 'timeout': args.timeout})
    return sites
  except IOError as e:
    args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
    sys.exit(1)


def do_run(args):
  # Determine the websites to benchmark.
  if args.sites_file:
    sites = read_sites_file(args)
  elif args.sites:
    sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
  else:
    sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES]
  # Disambiguate domains, if needed.
  L = []
  domains = {}
  for item in sites:
    site = item['url']
    m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
    if not m:
      args.error("Invalid URL {}.".format(site))
      continue
    domain = m.group(2)
    entry = [site, domain, None, item['timeout']]
    if domain not in domains:
      domains[domain] = entry
    else:
      if not isinstance(domains[domain], int):
        domains[domain][2] = 1
        domains[domain] = 1
      domains[domain] += 1
      entry[2] = domains[domain]
    L.append(entry)
  if args.replay_wpr:
    replay_server = start_replay_server(args);
  try:
    # Run them.
    for site, domain, count, timeout in L:
      if count is not None: domain = "{}%{}".format(domain, count)
      print site, domain, timeout
      run_site(site, domain, args, timeout)
  finally:
    if replay_server:
      stop_replay_server(replay_server)


# Calculate statistics.

def statistics(data):
  N = len(data)
  average = numpy.average(data)
  median = numpy.median(data)
  low = numpy.min(data)
  high= numpy.max(data)
  if N > 1:
    # evaluate sample variance by setting delta degrees of freedom (ddof) to
    # 1. The degree used in calculations is N - ddof
    stddev = numpy.std(data, ddof=1)
    # Get the endpoints of the range that contains 95% of the distribution
    t_bounds = scipy.stats.t.interval(0.95, N-1)
    #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
    # sum mean to the confidence interval
    ci = {
        'abs': t_bounds[1] * stddev / sqrt(N),
        'low': average + t_bounds[0] * stddev / sqrt(N),
        'high': average + t_bounds[1] * stddev / sqrt(N)
    }
  else:
    stddev = 0
    ci = { 'abs': 0, 'low': average, 'high': average }
  if abs(stddev) > 0.0001 and abs(average) > 0.0001:
    ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
  else:
    ci['perc'] = 0
  return { 'samples': N, 'average': average, 'median': median,
           'stddev': stddev, 'min': low, 'max': high, 'ci': ci }


def read_stats(path, S):
  with open(path, "rt") as f:
    # Process the whole file and sum repeating entries.
    D = { 'Sum': {'time': 0, 'count': 0} }
    for line in f:
      line = line.strip()
      # Discard headers and footers.
      if not line: continue
      if line.startswith("Runtime Function"): continue
      if line.startswith("===="): continue
      if line.startswith("----"): continue
      if line.startswith("URL:"): continue
      # We have a regular line.
      fields = line.split()
      key = fields[0]
      time = float(fields[1].replace("ms", ""))
      count = int(fields[3])
      if key not in D: D[key] = { 'time': 0, 'count': 0 }
      D[key]['time'] += time
      D[key]['count'] += count
      # We calculate the sum, if it's not the "total" line.
      if key != "Total":
        D['Sum']['time'] += time
        D['Sum']['count'] += count
    # Append the sums as single entries to S.
    for key in D:
      if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
      S[key]['time_list'].append(D[key]['time'])
      S[key]['count_list'].append(D[key]['count'])


def print_stats(S, args):
  # Sort by ascending/descending time average, then by ascending/descending
  # count average, then by ascending name.
  def sort_asc_func(item):
    return (item[1]['time_stat']['average'],
            item[1]['count_stat']['average'],
            item[0])
  def sort_desc_func(item):
    return (-item[1]['time_stat']['average'],
            -item[1]['count_stat']['average'],
            item[0])
  # Sorting order is in the commend-line arguments.
  sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
  # Possibly limit how many elements to print.
  L = [item for item in sorted(S.items(), key=sort_func)
       if item[0] not in ["Total", "Sum"]]
  N = len(L)
  if args.limit == 0:
    low, high = 0, N
  elif args.sort == "desc":
    low, high = 0, args.limit
  else:
    low, high = N-args.limit, N
  # How to print entries.
  def print_entry(key, value):
    def stats(s, units=""):
      conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
      return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
    print "{:>50s}  {}  {}".format(
      key,
      stats(value['time_stat'], units="ms"),
      stats(value['count_stat'])
    )
  # Print and calculate partial sums, if necessary.
  for i in range(low, high):
    print_entry(*L[i])
    if args.totals and args.limit != 0:
      if i == low:
        partial = { 'time_list': [0] * len(L[i][1]['time_list']),
                    'count_list': [0] * len(L[i][1]['count_list']) }
      assert len(partial['time_list']) == len(L[i][1]['time_list'])
      assert len(partial['count_list']) == len(L[i][1]['count_list'])
      for j, v in enumerate(L[i][1]['time_list']):
        partial['time_list'][j] += v
      for j, v in enumerate(L[i][1]['count_list']):
        partial['count_list'][j] += v
  # Print totals, if necessary.
  if args.totals:
    print '-' * 80
    if args.limit != 0:
      partial['time_stat'] = statistics(partial['time_list'])
      partial['count_stat'] = statistics(partial['count_list'])
      print_entry("Partial", partial)
    print_entry("Sum", S["Sum"])
    print_entry("Total", S["Total"])


def do_stats(args):
  T = {}
  for path in args.logfiles:
    filename = os.path.basename(path)
    m = re.match(r'^([^#]+)(#.*)?$', filename)
    domain = m.group(1)
    if domain not in T: T[domain] = {}
    read_stats(path, T[domain])
  for i, domain in enumerate(sorted(T)):
    if len(T) > 1:
      if i > 0: print
      print "{}:".format(domain)
      print '=' * 80
    S = T[domain]
    for key in S:
      S[key]['time_stat'] = statistics(S[key]['time_list'])
      S[key]['count_stat'] = statistics(S[key]['count_list'])
    print_stats(S, args)


# Generate JSON file.

def do_json(args):
  J = {}
  for path in args.logdirs:
    if os.path.isdir(path):
      for root, dirs, files in os.walk(path):
        version = os.path.basename(root)
        if version not in J: J[version] = {}
        for filename in files:
          if filename.endswith(".txt"):
            m = re.match(r'^([^#]+)(#.*)?$', filename)
            domain = m.group(1)
            if domain not in J[version]: J[version][domain] = {}
            read_stats(os.path.join(root, filename), J[version][domain])
  for version, T in J.items():
    for domain, S in T.items():
      A = []
      for name, value in S.items():
        # We don't want the calculated sum in the JSON file.
        if name == "Sum": continue
        entry = [name]
        for x in ['time_list', 'count_list']:
          s = statistics(S[name][x])
          entry.append(round(s['average'], 1))
          entry.append(round(s['ci']['abs'], 1))
          entry.append(round(s['ci']['perc'], 2))
        A.append(entry)
      T[domain] = A
  print json.dumps(J, separators=(',', ':'))


# Help.

def do_help(parser, subparsers, args):
  if args.help_cmd:
    if args.help_cmd in subparsers:
      subparsers[args.help_cmd].print_help()
    else:
      args.error("Unknown command '{}'".format(args.help_cmd))
  else:
    parser.print_help()


# Main program, parse command line and execute.

def main():
  parser = argparse.ArgumentParser()
  subparser_adder = parser.add_subparsers(title="commands", dest="command",
                                          metavar="<command>")
  subparsers = {}
  # Command: run.
  subparsers["run"] = subparser_adder.add_parser(
      "run", help="run --help")
  subparsers["run"].set_defaults(
      func=do_run, error=subparsers["run"].error)
  subparsers["run"].add_argument(
      "--chrome-flags", type=str, default="",
      help="specify additional chrome flags")
  subparsers["run"].add_argument(
      "--js-flags", type=str, default="",
      help="specify additional V8 flags")
  subparsers["run"].add_argument(
      "--no-url", dest="print_url", action="store_false", default=True,
      help="do not include url in statistics file")
  subparsers["run"].add_argument(
      "-n", "--repeat", type=int, metavar="<num>",
      help="specify iterations for each website (default: once)")
  subparsers["run"].add_argument(
      "--replay-wpr", type=str, metavar="<path>",
      help="use the specified web page replay (.wpr) archive")
  subparsers["run"].add_argument(
      "--replay-bin", type=str, metavar="<path>",
      help="specify the replay.py script typically located in " \
           "$CHROMIUM/src/third_party/webpagereplay/replay.py")
  subparsers["run"].add_argument(
      "-r", "--retries", type=int, metavar="<num>",
      help="specify retries if website is down (default: forever)")
  subparsers["run"].add_argument(
      "-f", "--sites-file", type=str, metavar="<path>",
      help="specify file containing benchmark websites")
  subparsers["run"].add_argument(
      "-t", "--timeout", type=int, metavar="<seconds>", default=60,
      help="specify seconds before chrome is killed")
  subparsers["run"].add_argument(
      "-u", "--user-data-dir", type=str, metavar="<path>",
      help="specify user data dir (default is temporary)")
  subparsers["run"].add_argument(
      "-c", "--with-chrome", type=str, metavar="<path>",
      default="/usr/bin/google-chrome",
      help="specify chrome executable to use")
  subparsers["run"].add_argument(
      "sites", type=str, metavar="<URL>", nargs="*",
      help="specify benchmark website")
  # Command: stats.
  subparsers["stats"] = subparser_adder.add_parser(
      "stats", help="stats --help")
  subparsers["stats"].set_defaults(
      func=do_stats, error=subparsers["stats"].error)
  subparsers["stats"].add_argument(
      "-l", "--limit", type=int, metavar="<num>", default=0,
      help="limit how many items to print (default: none)")
  subparsers["stats"].add_argument(
      "-s", "--sort", choices=["asc", "desc"], default="asc",
      help="specify sorting order (default: ascending)")
  subparsers["stats"].add_argument(
      "-n", "--no-total", dest="totals", action="store_false", default=True,
      help="do not print totals")
  subparsers["stats"].add_argument(
      "logfiles", type=str, metavar="<logfile>", nargs="*",
      help="specify log files to parse")
  # Command: json.
  subparsers["json"] = subparser_adder.add_parser(
      "json", help="json --help")
  subparsers["json"].set_defaults(
      func=do_json, error=subparsers["json"].error)
  subparsers["json"].add_argument(
      "logdirs", type=str, metavar="<logdir>", nargs="*",
      help="specify directories with log files to parse")
  # Command: help.
  subparsers["help"] = subparser_adder.add_parser(
      "help", help="help information")
  subparsers["help"].set_defaults(
      func=lambda args: do_help(parser, subparsers, args),
      error=subparsers["help"].error)
  subparsers["help"].add_argument(
      "help_cmd", type=str, metavar="<command>", nargs="?",
      help="command for which to display help")
  # Execute the command.
  args = parser.parse_args()
  if args.command == "run" and args.sites_file and args.sites:
    args.error("if --sites-file is used, no site URLS must be given")
    sys.exit(1)
  elif args.command == "run" and args.replay_wpr and not args.replay_bin:
    args.error("if --replay-wpr is used, --replay-bin must be given")
    sys.exit(1)
  else:
    args.func(args)

if __name__ == "__main__":
  sys.exit(main())
Add script for benchmarking with --runtime-call-stats R=cbruni@chromium.org BUG= LOG=N Review-Url: https://codereview.chromium.org/1922873004 Cr-Commit-Position: refs/heads/master@{#36070} 2016-05-06 10:17:15 +00:00			`#!/usr/bin/env python`
			`# Copyright 2016 the V8 project authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`
			`'''`
			`Usage: runtime-call-stats.py [-h] <command> ...`

			`Optional arguments:`
			`-h, --help show this help message and exit`

			`Commands:`
			`run run chrome with --runtime-call-stats and generate logs`
			`stats process logs and print statistics`
			`json process logs from several versions and generate JSON`
			`help help information`

			`For each command, you can try ./runtime-call-stats.py help command.`
			`'''`

			`import argparse`
			`import json`
			`import os`
			`import re`
			`import shutil`
			`import subprocess`
			`import sys`
			`import tempfile`

			`import numpy`
			`import scipy`
			`import scipy.stats`
			`from math import sqrt`


			`# Run benchmarks.`

			`DEFAULT_SITES = [`
			`# top websites (http://alexa.com/topsites): --------------------`
			`"https://www.google.de/search?q=v8",`
			`"https://www.youtube.com",`
			`"https://www.facebook.com/shakira",`
			`"http://www.baidu.com/s?wd=v8",`
			`"http://www.yahoo.co.jp",`
			`"http://www.amazon.com/s/?field-keywords=v8",`
			`"http://hi.wikipedia.org/wiki/" \`
			`"%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0",`
			`"http://www.qq.com",`
			`"http://www.twitter.com/taylorswift13",`
			`"http://www.reddit.com",`
			`"http://www.ebay.fr/sch/i.html?_nkw=v8",`
			`"http://edition.cnn.com",`
			`"http://world.taobao.com",`
			`"http://www.instagram.com/archdigest",`
			`"https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",`
			`"http://www.msn.com/ar-ae",`
			`"http://www.bing.com/search?q=v8+engine",`
			`"http://www.pinterest.com/categories/popular",`
			`"http://www.sina.com.cn",`
			`"http://weibo.com",`
			`"http://yandex.ru/search/?text=v8",`
			`# framework driven decisions: -----------------------------------`
			`# wikipedia content + angularjs`
			`"http://www.wikiwand.com/en/hill",`
			`# ember website`
			`"http://meta.discourse.org",`
			`# backbone js`
			`"http://reddit.musicplayer.io",`
			`# gwt application`
			`"http://inbox.google.com",`
			`# webgl / algorithmic case`
			`"http://maps.google.co.jp/maps/search/restaurant+tokyo",`
			`# whatever framework adwords uses`
			`"https://adwords.google.com",`
			`]`


			`def print_command(cmd_args):`
			`def fix_for_printing(arg):`
			`m = re.match(r'^--([^=]+)=(.*)$', arg)`
			`if m and (' ' in m.group(2) or m.group(2).startswith('-')):`
			`arg = "--{}='{}'".format(m.group(1), m.group(2))`
			`elif ' ' in arg:`
			`arg = "'{}'".format(arg)`
			`return arg`
			`print " ".join(map(fix_for_printing, cmd_args))`


			`def start_replay_server(args):`
			`cmd_args = [`
			`args.replay_bin,`
			`"--port=4080",`
			`"--ssl_port=4443",`
			`"--no-dns_forwarding",`
			`"--use_closest_match",`
			`"--no-diff_unknown_requests",`
			`args.replay_wpr,`
			`]`
			`print "=" * 80`
			`print_command(cmd_args)`
			`with open(os.devnull, 'w') as null:`
			`server = subprocess.Popen(cmd_args, stdout=null, stderr=null)`
			`print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)`
			`print "=" * 80`
			`return server`


			`def stop_replay_server(server):`
			`print("SHUTTING DOWN REPLAY SERVER %s" % server.pid)`
			`server.terminate()`


			`def run_site(site, domain, args, timeout=None):`
			`print "="*80`
			`print "RUNNING DOMAIN %s" % domain`
			`print "="*80`
			`result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"`
			`count = 0`
			`while count == 0 or args.repeat is not None and count < args.repeat:`
			`count += 1`
			`result = result_template.format(domain=domain, count=count)`
			`retries = 0`
			`while args.retries is None or retries < args.retries:`
			`retries += 1`
			`try:`
			`temp_user_data_dir = args.user_data_dir is None`
			`if temp_user_data_dir:`
			`user_data_dir = tempfile.mkdtemp(prefix="chr_")`
			`js_flags = "--runtime-call-stats"`
			`if args.js_flags: js_flags += " " + args.js_flags`
			`chrome_flags = [`
			`"--no-default-browser-check",`
			`"--disable-translate",`
			`"--single-process",`
			`"--no-sandbox",`
			`"--js-flags={}".format(js_flags),`
			`"--no-first-run",`
			`"--user-data-dir={}".format(user_data_dir),`
			`]`
			`if args.replay_wpr:`
			`chrome_flags += [`
			`"--host-resolver-rules=MAP *:80 localhost:4080, " \`
			`"MAP *:443 localhost:4443, " \`
			`"EXCLUDE localhost",`
			`"--ignore-certificate-errors",`
			`"--disable-web-security",`
			`"--reduce-security-for-testing",`
			`"--allow-insecure-localhost",`
			`]`
			`if args.chrome_flags:`
			`chrome_flags += args.chrome_flags.split()`
			`if timeout is None: timeout = args.timeout`
			`cmd_args = [`
			`"timeout", str(timeout),`
			`args.with_chrome`
			`] + chrome_flags + [ site ]`
			`print "- " * 40`
			`print_command(cmd_args)`
			`print "- " * 40`
			`with open(result, "wt") as f:`
			`status = subprocess.call(cmd_args, stdout=f)`
			`# 124 means timeout killed chrome, 0 means the user was bored first!`
			`# If none of these two happened, then chrome apparently crashed, so`
			`# it must be called again.`
			`if status != 124 and status != 0:`
			`print("CHROME CRASHED, REPEATING RUN");`
			`continue`
			`# If the stats file is empty, chrome must be called again.`
			`if os.path.isfile(result) and os.path.getsize(result) > 0:`
			`if args.print_url:`
			`with open(result, "at") as f:`
			`print >> f`
			`print >> f, "URL: {}".format(site)`
			`break`
			`print("EMPTY RESULT, REPEATING RUN");`
			`finally:`
			`if temp_user_data_dir:`
			`shutil.rmtree(user_data_dir)`


			`def read_sites_file(args):`
			`try:`
			`sites = []`
			`try:`
			`with open(args.sites_file, "rt") as f:`
			`for item in json.load(f):`
			`if 'timeout' not in item:`
			`# This is more-or-less arbitrary.`
			`item['timeout'] = int(2.5 * item['timeline'] + 3)`
			`if item['timeout'] > args.timeout: item['timeout'] = args.timeout`
			`sites.append(item)`
			`except ValueError:`
			`with open(args.sites_file, "rt") as f:`
			`for line in f:`
			`line = line.strip()`
			`if not line or line.startswith('#'): continue`
			`sites.append({'url': line, 'timeout': args.timeout})`
			`return sites`
			`except IOError as e:`
			`args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))`
			`sys.exit(1)`


			`def do_run(args):`
			`# Determine the websites to benchmark.`
			`if args.sites_file:`
			`sites = read_sites_file(args)`
			`elif args.sites:`
			`sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]`
			`else:`
			`sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES]`
			`# Disambiguate domains, if needed.`
			`L = []`
			`domains = {}`
			`for item in sites:`
			`site = item['url']`
			`m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)`
			`if not m:`
			`args.error("Invalid URL {}.".format(site))`
			`continue`
			`domain = m.group(2)`
			`entry = [site, domain, None, item['timeout']]`
			`if domain not in domains:`
			`domains[domain] = entry`
			`else:`
			`if not isinstance(domains[domain], int):`
			`domains[domain][2] = 1`
			`domains[domain] = 1`
			`domains[domain] += 1`
			`entry[2] = domains[domain]`
			`L.append(entry)`
			`if args.replay_wpr:`
			`replay_server = start_replay_server(args);`
			`try:`
			`# Run them.`
			`for site, domain, count, timeout in L:`
			`if count is not None: domain = "{}%{}".format(domain, count)`
			`print site, domain, timeout`
			`run_site(site, domain, args, timeout)`
			`finally:`
			`if replay_server:`
			`stop_replay_server(replay_server)`


			`# Calculate statistics.`

			`def statistics(data):`
			`N = len(data)`
			`average = numpy.average(data)`
			`median = numpy.median(data)`
			`low = numpy.min(data)`
			`high= numpy.max(data)`
			`if N > 1:`
			`# evaluate sample variance by setting delta degrees of freedom (ddof) to`
			`# 1. The degree used in calculations is N - ddof`
			`stddev = numpy.std(data, ddof=1)`
			`# Get the endpoints of the range that contains 95% of the distribution`
			`t_bounds = scipy.stats.t.interval(0.95, N-1)`
			`#assert abs(t_bounds[0] + t_bounds[1]) < 1e-6`
			`# sum mean to the confidence interval`
			`ci = {`
			`'abs': t_bounds[1] * stddev / sqrt(N),`
			`'low': average + t_bounds[0] * stddev / sqrt(N),`
			`'high': average + t_bounds[1] * stddev / sqrt(N)`
			`}`
			`else:`
			`stddev = 0`
			`ci = { 'abs': 0, 'low': average, 'high': average }`
			`if abs(stddev) > 0.0001 and abs(average) > 0.0001:`
			`ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100`
			`else:`
			`ci['perc'] = 0`
			`return { 'samples': N, 'average': average, 'median': median,`
			`'stddev': stddev, 'min': low, 'max': high, 'ci': ci }`


			`def read_stats(path, S):`
			`with open(path, "rt") as f:`
			`# Process the whole file and sum repeating entries.`
			`D = { 'Sum': {'time': 0, 'count': 0} }`
			`for line in f:`
			`line = line.strip()`
			`# Discard headers and footers.`
			`if not line: continue`
			`if line.startswith("Runtime Function"): continue`
			`if line.startswith("===="): continue`
			`if line.startswith("----"): continue`
			`if line.startswith("URL:"): continue`
			`# We have a regular line.`
			`fields = line.split()`
			`key = fields[0]`
			`time = float(fields[1].replace("ms", ""))`
			`count = int(fields[3])`
			`if key not in D: D[key] = { 'time': 0, 'count': 0 }`
			`D[key]['time'] += time`
			`D[key]['count'] += count`
			`# We calculate the sum, if it's not the "total" line.`
			`if key != "Total":`
			`D['Sum']['time'] += time`
			`D['Sum']['count'] += count`
			`# Append the sums as single entries to S.`
			`for key in D:`
			`if key not in S: S[key] = { 'time_list': [], 'count_list': [] }`
			`S[key]['time_list'].append(D[key]['time'])`
			`S[key]['count_list'].append(D[key]['count'])`


			`def print_stats(S, args):`
			`# Sort by ascending/descending time average, then by ascending/descending`
			`# count average, then by ascending name.`
			`def sort_asc_func(item):`
			`return (item[1]['time_stat']['average'],`
			`item[1]['count_stat']['average'],`
			`item[0])`
			`def sort_desc_func(item):`
			`return (-item[1]['time_stat']['average'],`
			`-item[1]['count_stat']['average'],`
			`item[0])`
			`# Sorting order is in the commend-line arguments.`
			`sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func`
			`# Possibly limit how many elements to print.`
			`L = [item for item in sorted(S.items(), key=sort_func)`
			`if item[0] not in ["Total", "Sum"]]`
			`N = len(L)`
			`if args.limit == 0:`
			`low, high = 0, N`
			`elif args.sort == "desc":`
			`low, high = 0, args.limit`
			`else:`
			`low, high = N-args.limit, N`
			`# How to print entries.`
			`def print_entry(key, value):`
			`def stats(s, units=""):`
			`conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])`
			`return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)`
			`print "{:>50s} {} {}".format(`
			`key,`
			`stats(value['time_stat'], units="ms"),`
			`stats(value['count_stat'])`
			`)`
			`# Print and calculate partial sums, if necessary.`
			`for i in range(low, high):`
			`print_entry(*L[i])`
			`if args.totals and args.limit != 0:`
			`if i == low:`
			`partial = { 'time_list': [0] * len(L[i][1]['time_list']),`
			`'count_list': [0] * len(L[i][1]['count_list']) }`
			`assert len(partial['time_list']) == len(L[i][1]['time_list'])`
			`assert len(partial['count_list']) == len(L[i][1]['count_list'])`
			`for j, v in enumerate(L[i][1]['time_list']):`
			`partial['time_list'][j] += v`
			`for j, v in enumerate(L[i][1]['count_list']):`
			`partial['count_list'][j] += v`
			`# Print totals, if necessary.`
			`if args.totals:`
			`print '-' * 80`
			`if args.limit != 0:`
			`partial['time_stat'] = statistics(partial['time_list'])`
			`partial['count_stat'] = statistics(partial['count_list'])`
			`print_entry("Partial", partial)`
			`print_entry("Sum", S["Sum"])`
			`print_entry("Total", S["Total"])`


			`def do_stats(args):`
			`T = {}`
			`for path in args.logfiles:`
			`filename = os.path.basename(path)`
			`m = re.match(r'^([^#]+)(#.*)?$', filename)`
			`domain = m.group(1)`
			`if domain not in T: T[domain] = {}`
			`read_stats(path, T[domain])`
			`for i, domain in enumerate(sorted(T)):`
			`if len(T) > 1:`
			`if i > 0: print`
			`print "{}:".format(domain)`
			`print '=' * 80`
			`S = T[domain]`
			`for key in S:`
			`S[key]['time_stat'] = statistics(S[key]['time_list'])`
			`S[key]['count_stat'] = statistics(S[key]['count_list'])`
			`print_stats(S, args)`


			`# Generate JSON file.`

			`def do_json(args):`
			`J = {}`
			`for path in args.logdirs:`
			`if os.path.isdir(path):`
			`for root, dirs, files in os.walk(path):`
			`version = os.path.basename(root)`
			`if version not in J: J[version] = {}`
			`for filename in files:`
			`if filename.endswith(".txt"):`
			`m = re.match(r'^([^#]+)(#.*)?$', filename)`
			`domain = m.group(1)`
			`if domain not in J[version]: J[version][domain] = {}`
			`read_stats(os.path.join(root, filename), J[version][domain])`
			`for version, T in J.items():`
			`for domain, S in T.items():`
			`A = []`
			`for name, value in S.items():`
			`# We don't want the calculated sum in the JSON file.`
			`if name == "Sum": continue`
			`entry = [name]`
			`for x in ['time_list', 'count_list']:`
			`s = statistics(S[name][x])`
			`entry.append(round(s['average'], 1))`
			`entry.append(round(s['ci']['abs'], 1))`
			`entry.append(round(s['ci']['perc'], 2))`
			`A.append(entry)`
			`T[domain] = A`
			`print json.dumps(J, separators=(',', ':'))`


			`# Help.`

			`def do_help(parser, subparsers, args):`
			`if args.help_cmd:`
			`if args.help_cmd in subparsers:`
			`subparsers[args.help_cmd].print_help()`
			`else:`
			`args.error("Unknown command '{}'".format(args.help_cmd))`
			`else:`
			`parser.print_help()`


			`# Main program, parse command line and execute.`

			`def main():`
			`parser = argparse.ArgumentParser()`
			`subparser_adder = parser.add_subparsers(title="commands", dest="command",`
			`metavar="<command>")`
			`subparsers = {}`
			`# Command: run.`
			`subparsers["run"] = subparser_adder.add_parser(`
			`"run", help="run --help")`
			`subparsers["run"].set_defaults(`
			`func=do_run, error=subparsers["run"].error)`
			`subparsers["run"].add_argument(`
			`"--chrome-flags", type=str, default="",`
			`help="specify additional chrome flags")`
			`subparsers["run"].add_argument(`
			`"--js-flags", type=str, default="",`
			`help="specify additional V8 flags")`
			`subparsers["run"].add_argument(`
			`"--no-url", dest="print_url", action="store_false", default=True,`
			`help="do not include url in statistics file")`
			`subparsers["run"].add_argument(`
			`"-n", "--repeat", type=int, metavar="<num>",`
			`help="specify iterations for each website (default: once)")`
			`subparsers["run"].add_argument(`
			`"--replay-wpr", type=str, metavar="<path>",`
			`help="use the specified web page replay (.wpr) archive")`
			`subparsers["run"].add_argument(`
			`"--replay-bin", type=str, metavar="<path>",`
			`help="specify the replay.py script typically located in " \`
			`"$CHROMIUM/src/third_party/webpagereplay/replay.py")`
			`subparsers["run"].add_argument(`
			`"-r", "--retries", type=int, metavar="<num>",`
			`help="specify retries if website is down (default: forever)")`
			`subparsers["run"].add_argument(`
			`"-f", "--sites-file", type=str, metavar="<path>",`
			`help="specify file containing benchmark websites")`
			`subparsers["run"].add_argument(`
			`"-t", "--timeout", type=int, metavar="<seconds>", default=60,`
			`help="specify seconds before chrome is killed")`
			`subparsers["run"].add_argument(`
			`"-u", "--user-data-dir", type=str, metavar="<path>",`
			`help="specify user data dir (default is temporary)")`
			`subparsers["run"].add_argument(`
			`"-c", "--with-chrome", type=str, metavar="<path>",`
			`default="/usr/bin/google-chrome",`
			`help="specify chrome executable to use")`
			`subparsers["run"].add_argument(`
			`"sites", type=str, metavar="<URL>", nargs="*",`
			`help="specify benchmark website")`
			`# Command: stats.`
			`subparsers["stats"] = subparser_adder.add_parser(`
			`"stats", help="stats --help")`
			`subparsers["stats"].set_defaults(`
			`func=do_stats, error=subparsers["stats"].error)`
			`subparsers["stats"].add_argument(`
			`"-l", "--limit", type=int, metavar="<num>", default=0,`
			`help="limit how many items to print (default: none)")`
			`subparsers["stats"].add_argument(`
			`"-s", "--sort", choices=["asc", "desc"], default="asc",`
			`help="specify sorting order (default: ascending)")`
			`subparsers["stats"].add_argument(`
			`"-n", "--no-total", dest="totals", action="store_false", default=True,`
			`help="do not print totals")`
			`subparsers["stats"].add_argument(`
			`"logfiles", type=str, metavar="<logfile>", nargs="*",`
			`help="specify log files to parse")`
			`# Command: json.`
			`subparsers["json"] = subparser_adder.add_parser(`
			`"json", help="json --help")`
			`subparsers["json"].set_defaults(`
			`func=do_json, error=subparsers["json"].error)`
			`subparsers["json"].add_argument(`
			`"logdirs", type=str, metavar="<logdir>", nargs="*",`
			`help="specify directories with log files to parse")`
			`# Command: help.`
			`subparsers["help"] = subparser_adder.add_parser(`
			`"help", help="help information")`
			`subparsers["help"].set_defaults(`
			`func=lambda args: do_help(parser, subparsers, args),`
			`error=subparsers["help"].error)`
			`subparsers["help"].add_argument(`
			`"help_cmd", type=str, metavar="<command>", nargs="?",`
			`help="command for which to display help")`
			`# Execute the command.`
			`args = parser.parse_args()`
			`if args.command == "run" and args.sites_file and args.sites:`
			`args.error("if --sites-file is used, no site URLS must be given")`
			`sys.exit(1)`
			`elif args.command == "run" and args.replay_wpr and not args.replay_bin:`
			`args.error("if --replay-wpr is used, --replay-bin must be given")`
			`sys.exit(1)`
			`else:`
			`args.func(args)`

			`if __name__ == "__main__":`
			`sys.exit(main())`