Add script for benchmarking with --runtime-call-stats
R=cbruni@chromium.org BUG= LOG=N Review-Url: https://codereview.chromium.org/1922873004 Cr-Commit-Position: refs/heads/master@{#36070}
This commit is contained in:
parent
85572b0189
commit
1d941a4466
524
tools/callstats.py
Normal file
524
tools/callstats.py
Normal file
@ -0,0 +1,524 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2016 the V8 project authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
'''
|
||||
Usage: runtime-call-stats.py [-h] <command> ...
|
||||
|
||||
Optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
|
||||
Commands:
|
||||
run run chrome with --runtime-call-stats and generate logs
|
||||
stats process logs and print statistics
|
||||
json process logs from several versions and generate JSON
|
||||
help help information
|
||||
|
||||
For each command, you can try ./runtime-call-stats.py help command.
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import numpy
|
||||
import scipy
|
||||
import scipy.stats
|
||||
from math import sqrt
|
||||
|
||||
|
||||
# Run benchmarks.
|
||||
|
||||
DEFAULT_SITES = [
|
||||
# top websites (http://alexa.com/topsites): --------------------
|
||||
"https://www.google.de/search?q=v8",
|
||||
"https://www.youtube.com",
|
||||
"https://www.facebook.com/shakira",
|
||||
"http://www.baidu.com/s?wd=v8",
|
||||
"http://www.yahoo.co.jp",
|
||||
"http://www.amazon.com/s/?field-keywords=v8",
|
||||
"http://hi.wikipedia.org/wiki/" \
|
||||
"%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0",
|
||||
"http://www.qq.com",
|
||||
"http://www.twitter.com/taylorswift13",
|
||||
"http://www.reddit.com",
|
||||
"http://www.ebay.fr/sch/i.html?_nkw=v8",
|
||||
"http://edition.cnn.com",
|
||||
"http://world.taobao.com",
|
||||
"http://www.instagram.com/archdigest",
|
||||
"https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",
|
||||
"http://www.msn.com/ar-ae",
|
||||
"http://www.bing.com/search?q=v8+engine",
|
||||
"http://www.pinterest.com/categories/popular",
|
||||
"http://www.sina.com.cn",
|
||||
"http://weibo.com",
|
||||
"http://yandex.ru/search/?text=v8",
|
||||
# framework driven decisions: -----------------------------------
|
||||
# wikipedia content + angularjs
|
||||
"http://www.wikiwand.com/en/hill",
|
||||
# ember website
|
||||
"http://meta.discourse.org",
|
||||
# backbone js
|
||||
"http://reddit.musicplayer.io",
|
||||
# gwt application
|
||||
"http://inbox.google.com",
|
||||
# webgl / algorithmic case
|
||||
"http://maps.google.co.jp/maps/search/restaurant+tokyo",
|
||||
# whatever framework adwords uses
|
||||
"https://adwords.google.com",
|
||||
]
|
||||
|
||||
|
||||
def print_command(cmd_args):
|
||||
def fix_for_printing(arg):
|
||||
m = re.match(r'^--([^=]+)=(.*)$', arg)
|
||||
if m and (' ' in m.group(2) or m.group(2).startswith('-')):
|
||||
arg = "--{}='{}'".format(m.group(1), m.group(2))
|
||||
elif ' ' in arg:
|
||||
arg = "'{}'".format(arg)
|
||||
return arg
|
||||
print " ".join(map(fix_for_printing, cmd_args))
|
||||
|
||||
|
||||
def start_replay_server(args):
|
||||
cmd_args = [
|
||||
args.replay_bin,
|
||||
"--port=4080",
|
||||
"--ssl_port=4443",
|
||||
"--no-dns_forwarding",
|
||||
"--use_closest_match",
|
||||
"--no-diff_unknown_requests",
|
||||
args.replay_wpr,
|
||||
]
|
||||
print "=" * 80
|
||||
print_command(cmd_args)
|
||||
with open(os.devnull, 'w') as null:
|
||||
server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
|
||||
print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
|
||||
print "=" * 80
|
||||
return server
|
||||
|
||||
|
||||
def stop_replay_server(server):
|
||||
print("SHUTTING DOWN REPLAY SERVER %s" % server.pid)
|
||||
server.terminate()
|
||||
|
||||
|
||||
def run_site(site, domain, args, timeout=None):
|
||||
print "="*80
|
||||
print "RUNNING DOMAIN %s" % domain
|
||||
print "="*80
|
||||
result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
|
||||
count = 0
|
||||
while count == 0 or args.repeat is not None and count < args.repeat:
|
||||
count += 1
|
||||
result = result_template.format(domain=domain, count=count)
|
||||
retries = 0
|
||||
while args.retries is None or retries < args.retries:
|
||||
retries += 1
|
||||
try:
|
||||
temp_user_data_dir = args.user_data_dir is None
|
||||
if temp_user_data_dir:
|
||||
user_data_dir = tempfile.mkdtemp(prefix="chr_")
|
||||
js_flags = "--runtime-call-stats"
|
||||
if args.js_flags: js_flags += " " + args.js_flags
|
||||
chrome_flags = [
|
||||
"--no-default-browser-check",
|
||||
"--disable-translate",
|
||||
"--single-process",
|
||||
"--no-sandbox",
|
||||
"--js-flags={}".format(js_flags),
|
||||
"--no-first-run",
|
||||
"--user-data-dir={}".format(user_data_dir),
|
||||
]
|
||||
if args.replay_wpr:
|
||||
chrome_flags += [
|
||||
"--host-resolver-rules=MAP *:80 localhost:4080, " \
|
||||
"MAP *:443 localhost:4443, " \
|
||||
"EXCLUDE localhost",
|
||||
"--ignore-certificate-errors",
|
||||
"--disable-web-security",
|
||||
"--reduce-security-for-testing",
|
||||
"--allow-insecure-localhost",
|
||||
]
|
||||
if args.chrome_flags:
|
||||
chrome_flags += args.chrome_flags.split()
|
||||
if timeout is None: timeout = args.timeout
|
||||
cmd_args = [
|
||||
"timeout", str(timeout),
|
||||
args.with_chrome
|
||||
] + chrome_flags + [ site ]
|
||||
print "- " * 40
|
||||
print_command(cmd_args)
|
||||
print "- " * 40
|
||||
with open(result, "wt") as f:
|
||||
status = subprocess.call(cmd_args, stdout=f)
|
||||
# 124 means timeout killed chrome, 0 means the user was bored first!
|
||||
# If none of these two happened, then chrome apparently crashed, so
|
||||
# it must be called again.
|
||||
if status != 124 and status != 0:
|
||||
print("CHROME CRASHED, REPEATING RUN");
|
||||
continue
|
||||
# If the stats file is empty, chrome must be called again.
|
||||
if os.path.isfile(result) and os.path.getsize(result) > 0:
|
||||
if args.print_url:
|
||||
with open(result, "at") as f:
|
||||
print >> f
|
||||
print >> f, "URL: {}".format(site)
|
||||
break
|
||||
print("EMPTY RESULT, REPEATING RUN");
|
||||
finally:
|
||||
if temp_user_data_dir:
|
||||
shutil.rmtree(user_data_dir)
|
||||
|
||||
|
||||
def read_sites_file(args):
|
||||
try:
|
||||
sites = []
|
||||
try:
|
||||
with open(args.sites_file, "rt") as f:
|
||||
for item in json.load(f):
|
||||
if 'timeout' not in item:
|
||||
# This is more-or-less arbitrary.
|
||||
item['timeout'] = int(2.5 * item['timeline'] + 3)
|
||||
if item['timeout'] > args.timeout: item['timeout'] = args.timeout
|
||||
sites.append(item)
|
||||
except ValueError:
|
||||
with open(args.sites_file, "rt") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'): continue
|
||||
sites.append({'url': line, 'timeout': args.timeout})
|
||||
return sites
|
||||
except IOError as e:
|
||||
args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def do_run(args):
|
||||
# Determine the websites to benchmark.
|
||||
if args.sites_file:
|
||||
sites = read_sites_file(args)
|
||||
elif args.sites:
|
||||
sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
|
||||
else:
|
||||
sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES]
|
||||
# Disambiguate domains, if needed.
|
||||
L = []
|
||||
domains = {}
|
||||
for item in sites:
|
||||
site = item['url']
|
||||
m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
|
||||
if not m:
|
||||
args.error("Invalid URL {}.".format(site))
|
||||
continue
|
||||
domain = m.group(2)
|
||||
entry = [site, domain, None, item['timeout']]
|
||||
if domain not in domains:
|
||||
domains[domain] = entry
|
||||
else:
|
||||
if not isinstance(domains[domain], int):
|
||||
domains[domain][2] = 1
|
||||
domains[domain] = 1
|
||||
domains[domain] += 1
|
||||
entry[2] = domains[domain]
|
||||
L.append(entry)
|
||||
if args.replay_wpr:
|
||||
replay_server = start_replay_server(args);
|
||||
try:
|
||||
# Run them.
|
||||
for site, domain, count, timeout in L:
|
||||
if count is not None: domain = "{}%{}".format(domain, count)
|
||||
print site, domain, timeout
|
||||
run_site(site, domain, args, timeout)
|
||||
finally:
|
||||
if replay_server:
|
||||
stop_replay_server(replay_server)
|
||||
|
||||
|
||||
# Calculate statistics.
|
||||
|
||||
def statistics(data):
|
||||
N = len(data)
|
||||
average = numpy.average(data)
|
||||
median = numpy.median(data)
|
||||
low = numpy.min(data)
|
||||
high= numpy.max(data)
|
||||
if N > 1:
|
||||
# evaluate sample variance by setting delta degrees of freedom (ddof) to
|
||||
# 1. The degree used in calculations is N - ddof
|
||||
stddev = numpy.std(data, ddof=1)
|
||||
# Get the endpoints of the range that contains 95% of the distribution
|
||||
t_bounds = scipy.stats.t.interval(0.95, N-1)
|
||||
#assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
|
||||
# sum mean to the confidence interval
|
||||
ci = {
|
||||
'abs': t_bounds[1] * stddev / sqrt(N),
|
||||
'low': average + t_bounds[0] * stddev / sqrt(N),
|
||||
'high': average + t_bounds[1] * stddev / sqrt(N)
|
||||
}
|
||||
else:
|
||||
stddev = 0
|
||||
ci = { 'abs': 0, 'low': average, 'high': average }
|
||||
if abs(stddev) > 0.0001 and abs(average) > 0.0001:
|
||||
ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
|
||||
else:
|
||||
ci['perc'] = 0
|
||||
return { 'samples': N, 'average': average, 'median': median,
|
||||
'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
|
||||
|
||||
|
||||
def read_stats(path, S):
|
||||
with open(path, "rt") as f:
|
||||
# Process the whole file and sum repeating entries.
|
||||
D = { 'Sum': {'time': 0, 'count': 0} }
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
# Discard headers and footers.
|
||||
if not line: continue
|
||||
if line.startswith("Runtime Function"): continue
|
||||
if line.startswith("===="): continue
|
||||
if line.startswith("----"): continue
|
||||
if line.startswith("URL:"): continue
|
||||
# We have a regular line.
|
||||
fields = line.split()
|
||||
key = fields[0]
|
||||
time = float(fields[1].replace("ms", ""))
|
||||
count = int(fields[3])
|
||||
if key not in D: D[key] = { 'time': 0, 'count': 0 }
|
||||
D[key]['time'] += time
|
||||
D[key]['count'] += count
|
||||
# We calculate the sum, if it's not the "total" line.
|
||||
if key != "Total":
|
||||
D['Sum']['time'] += time
|
||||
D['Sum']['count'] += count
|
||||
# Append the sums as single entries to S.
|
||||
for key in D:
|
||||
if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
|
||||
S[key]['time_list'].append(D[key]['time'])
|
||||
S[key]['count_list'].append(D[key]['count'])
|
||||
|
||||
|
||||
def print_stats(S, args):
|
||||
# Sort by ascending/descending time average, then by ascending/descending
|
||||
# count average, then by ascending name.
|
||||
def sort_asc_func(item):
|
||||
return (item[1]['time_stat']['average'],
|
||||
item[1]['count_stat']['average'],
|
||||
item[0])
|
||||
def sort_desc_func(item):
|
||||
return (-item[1]['time_stat']['average'],
|
||||
-item[1]['count_stat']['average'],
|
||||
item[0])
|
||||
# Sorting order is in the commend-line arguments.
|
||||
sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
|
||||
# Possibly limit how many elements to print.
|
||||
L = [item for item in sorted(S.items(), key=sort_func)
|
||||
if item[0] not in ["Total", "Sum"]]
|
||||
N = len(L)
|
||||
if args.limit == 0:
|
||||
low, high = 0, N
|
||||
elif args.sort == "desc":
|
||||
low, high = 0, args.limit
|
||||
else:
|
||||
low, high = N-args.limit, N
|
||||
# How to print entries.
|
||||
def print_entry(key, value):
|
||||
def stats(s, units=""):
|
||||
conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
|
||||
return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
|
||||
print "{:>50s} {} {}".format(
|
||||
key,
|
||||
stats(value['time_stat'], units="ms"),
|
||||
stats(value['count_stat'])
|
||||
)
|
||||
# Print and calculate partial sums, if necessary.
|
||||
for i in range(low, high):
|
||||
print_entry(*L[i])
|
||||
if args.totals and args.limit != 0:
|
||||
if i == low:
|
||||
partial = { 'time_list': [0] * len(L[i][1]['time_list']),
|
||||
'count_list': [0] * len(L[i][1]['count_list']) }
|
||||
assert len(partial['time_list']) == len(L[i][1]['time_list'])
|
||||
assert len(partial['count_list']) == len(L[i][1]['count_list'])
|
||||
for j, v in enumerate(L[i][1]['time_list']):
|
||||
partial['time_list'][j] += v
|
||||
for j, v in enumerate(L[i][1]['count_list']):
|
||||
partial['count_list'][j] += v
|
||||
# Print totals, if necessary.
|
||||
if args.totals:
|
||||
print '-' * 80
|
||||
if args.limit != 0:
|
||||
partial['time_stat'] = statistics(partial['time_list'])
|
||||
partial['count_stat'] = statistics(partial['count_list'])
|
||||
print_entry("Partial", partial)
|
||||
print_entry("Sum", S["Sum"])
|
||||
print_entry("Total", S["Total"])
|
||||
|
||||
|
||||
def do_stats(args):
|
||||
T = {}
|
||||
for path in args.logfiles:
|
||||
filename = os.path.basename(path)
|
||||
m = re.match(r'^([^#]+)(#.*)?$', filename)
|
||||
domain = m.group(1)
|
||||
if domain not in T: T[domain] = {}
|
||||
read_stats(path, T[domain])
|
||||
for i, domain in enumerate(sorted(T)):
|
||||
if len(T) > 1:
|
||||
if i > 0: print
|
||||
print "{}:".format(domain)
|
||||
print '=' * 80
|
||||
S = T[domain]
|
||||
for key in S:
|
||||
S[key]['time_stat'] = statistics(S[key]['time_list'])
|
||||
S[key]['count_stat'] = statistics(S[key]['count_list'])
|
||||
print_stats(S, args)
|
||||
|
||||
|
||||
# Generate JSON file.
|
||||
|
||||
def do_json(args):
|
||||
J = {}
|
||||
for path in args.logdirs:
|
||||
if os.path.isdir(path):
|
||||
for root, dirs, files in os.walk(path):
|
||||
version = os.path.basename(root)
|
||||
if version not in J: J[version] = {}
|
||||
for filename in files:
|
||||
if filename.endswith(".txt"):
|
||||
m = re.match(r'^([^#]+)(#.*)?$', filename)
|
||||
domain = m.group(1)
|
||||
if domain not in J[version]: J[version][domain] = {}
|
||||
read_stats(os.path.join(root, filename), J[version][domain])
|
||||
for version, T in J.items():
|
||||
for domain, S in T.items():
|
||||
A = []
|
||||
for name, value in S.items():
|
||||
# We don't want the calculated sum in the JSON file.
|
||||
if name == "Sum": continue
|
||||
entry = [name]
|
||||
for x in ['time_list', 'count_list']:
|
||||
s = statistics(S[name][x])
|
||||
entry.append(round(s['average'], 1))
|
||||
entry.append(round(s['ci']['abs'], 1))
|
||||
entry.append(round(s['ci']['perc'], 2))
|
||||
A.append(entry)
|
||||
T[domain] = A
|
||||
print json.dumps(J, separators=(',', ':'))
|
||||
|
||||
|
||||
# Help.
|
||||
|
||||
def do_help(parser, subparsers, args):
|
||||
if args.help_cmd:
|
||||
if args.help_cmd in subparsers:
|
||||
subparsers[args.help_cmd].print_help()
|
||||
else:
|
||||
args.error("Unknown command '{}'".format(args.help_cmd))
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
# Main program, parse command line and execute.
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
subparser_adder = parser.add_subparsers(title="commands", dest="command",
|
||||
metavar="<command>")
|
||||
subparsers = {}
|
||||
# Command: run.
|
||||
subparsers["run"] = subparser_adder.add_parser(
|
||||
"run", help="run --help")
|
||||
subparsers["run"].set_defaults(
|
||||
func=do_run, error=subparsers["run"].error)
|
||||
subparsers["run"].add_argument(
|
||||
"--chrome-flags", type=str, default="",
|
||||
help="specify additional chrome flags")
|
||||
subparsers["run"].add_argument(
|
||||
"--js-flags", type=str, default="",
|
||||
help="specify additional V8 flags")
|
||||
subparsers["run"].add_argument(
|
||||
"--no-url", dest="print_url", action="store_false", default=True,
|
||||
help="do not include url in statistics file")
|
||||
subparsers["run"].add_argument(
|
||||
"-n", "--repeat", type=int, metavar="<num>",
|
||||
help="specify iterations for each website (default: once)")
|
||||
subparsers["run"].add_argument(
|
||||
"--replay-wpr", type=str, metavar="<path>",
|
||||
help="use the specified web page replay (.wpr) archive")
|
||||
subparsers["run"].add_argument(
|
||||
"--replay-bin", type=str, metavar="<path>",
|
||||
help="specify the replay.py script typically located in " \
|
||||
"$CHROMIUM/src/third_party/webpagereplay/replay.py")
|
||||
subparsers["run"].add_argument(
|
||||
"-r", "--retries", type=int, metavar="<num>",
|
||||
help="specify retries if website is down (default: forever)")
|
||||
subparsers["run"].add_argument(
|
||||
"-f", "--sites-file", type=str, metavar="<path>",
|
||||
help="specify file containing benchmark websites")
|
||||
subparsers["run"].add_argument(
|
||||
"-t", "--timeout", type=int, metavar="<seconds>", default=60,
|
||||
help="specify seconds before chrome is killed")
|
||||
subparsers["run"].add_argument(
|
||||
"-u", "--user-data-dir", type=str, metavar="<path>",
|
||||
help="specify user data dir (default is temporary)")
|
||||
subparsers["run"].add_argument(
|
||||
"-c", "--with-chrome", type=str, metavar="<path>",
|
||||
default="/usr/bin/google-chrome",
|
||||
help="specify chrome executable to use")
|
||||
subparsers["run"].add_argument(
|
||||
"sites", type=str, metavar="<URL>", nargs="*",
|
||||
help="specify benchmark website")
|
||||
# Command: stats.
|
||||
subparsers["stats"] = subparser_adder.add_parser(
|
||||
"stats", help="stats --help")
|
||||
subparsers["stats"].set_defaults(
|
||||
func=do_stats, error=subparsers["stats"].error)
|
||||
subparsers["stats"].add_argument(
|
||||
"-l", "--limit", type=int, metavar="<num>", default=0,
|
||||
help="limit how many items to print (default: none)")
|
||||
subparsers["stats"].add_argument(
|
||||
"-s", "--sort", choices=["asc", "desc"], default="asc",
|
||||
help="specify sorting order (default: ascending)")
|
||||
subparsers["stats"].add_argument(
|
||||
"-n", "--no-total", dest="totals", action="store_false", default=True,
|
||||
help="do not print totals")
|
||||
subparsers["stats"].add_argument(
|
||||
"logfiles", type=str, metavar="<logfile>", nargs="*",
|
||||
help="specify log files to parse")
|
||||
# Command: json.
|
||||
subparsers["json"] = subparser_adder.add_parser(
|
||||
"json", help="json --help")
|
||||
subparsers["json"].set_defaults(
|
||||
func=do_json, error=subparsers["json"].error)
|
||||
subparsers["json"].add_argument(
|
||||
"logdirs", type=str, metavar="<logdir>", nargs="*",
|
||||
help="specify directories with log files to parse")
|
||||
# Command: help.
|
||||
subparsers["help"] = subparser_adder.add_parser(
|
||||
"help", help="help information")
|
||||
subparsers["help"].set_defaults(
|
||||
func=lambda args: do_help(parser, subparsers, args),
|
||||
error=subparsers["help"].error)
|
||||
subparsers["help"].add_argument(
|
||||
"help_cmd", type=str, metavar="<command>", nargs="?",
|
||||
help="command for which to display help")
|
||||
# Execute the command.
|
||||
args = parser.parse_args()
|
||||
if args.command == "run" and args.sites_file and args.sites:
|
||||
args.error("if --sites-file is used, no site URLS must be given")
|
||||
sys.exit(1)
|
||||
elif args.command == "run" and args.replay_wpr and not args.replay_bin:
|
||||
args.error("if --replay-wpr is used, --replay-bin must be given")
|
||||
sys.exit(1)
|
||||
else:
|
||||
args.func(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user