[Test] CSuite benchmark runner

It's nice to have a quick way to get stable results on the local machine for the classic d8 benchmarks. CSuite revitalizes an old tool called BSuite which offers multiple averaged runs, and a nice ascii display of results. Sample usage: ./csuite.py octane baseline ~/src/v8/out/d8 ./csuite.py octane compare ~/src/v8/out-mine/d8 benchmark: score | master | % | ===================================================+==========+========+ Richards: 26201.0 | 26568.0 | -1.4 | DeltaBlue: 55744.0 | 58309.0 | -4.4 | Crypto: 31831.0 | 31815.0 | | RayTrace: 73481.0 | 73185.0 | 0.4 | ............<snip>............................ Octane: 36388.0 | 35741.0 | 1.8 | ---------------------------------------------------+----------+--------+ See README.md for more documentation. Change-Id: I182490506ca07fab1240bd485bd1d7a8920db893 Reviewed-on: https://chromium-review.googlesource.com/c/1387487 Commit-Queue: Michael Stanton <mvstanton@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#58434}
2018-12-21 13:40:33 +01:00 · 2018-12-21 13:40:33 +01:00 · 98b47ebb33
commit 98b47ebb33
parent 84450a2239
6 changed files with 819 additions and 0 deletions
--- a/test/benchmarks/csuite/README.md
+++ b/test/benchmarks/csuite/README.md
@ -0,0 +1,43 @@
+# CSuite: Local benchmarking help for V8 performance analysis
+
+CSuite helps you make N averaged runs of a benchmark, then compare with
+a different binary and/or different flags. It knows about the "classic"
+benchmarks of SunSpider, Kraken and Octane, which are still useful for
+investigating peak performance scenarios. It offers a default number of
+runs, by default they are:
+
+ * SunSpider - 100 runs
+ * Kraken - 80 runs
+ * Octane - 10 runs
+
+# Usage
+
+Say you want to see how much optimization buys you:
+
+    ./csuite.py kraken baseline ~/src/v8/out/d8 -x="--noopt"
+    ./csuite.py kraken compare ~/src/v8/out/d8
+
+
+Suppose you are comparing two binaries, and want a quick look at results.
+Normally, Octane should have about 10 runs, but 3 will only take a few
+minutes:
+
+    ./csuite.py -r 3 octane baseline ~/src/v8/out-master/d8
+    ./csuite.py -r 3 octane compare ~/src/v8/out-mine/d8
+
+You can run from any place:
+
+    ../../somewhere-strange/csuite.py sunspider baseline ./d8
+    ../../somewhere-strange/csuite.py sunspider compare ./d8-better
+
+Note that all output files are created in the directory where you run
+from. A `_benchmark_runner_data` directory will be created to store run
+output, and a `_results` directory as well for scores.
+
+For more detailed documentation, see:
+
+    ./csuite.py --help
+
+Output from the runners is captured into files and cached, so you can cancel
+and resume multi-hour benchmark runs with minimal loss of data/time. The -f
+flag forces re-running even if these cached files still exist.
--- a/test/benchmarks/csuite/benchmark.py
+++ b/test/benchmarks/csuite/benchmark.py
@ -0,0 +1,220 @@
+#!/usr/bin/python
+# Copyright 2018 the V8 project authors. All rights reserved.
+
+'''
+python %prog -c <command> [options]
+
+Local benchmark runner.
+The -c option is mandatory.
+'''
+
+import math
+from optparse import OptionParser
+import os
+import re
+import subprocess
+import sys
+import time
+
+def GeometricMean(numbers):
+  log = sum([math.log(n) for n in numbers])
+  return math.pow(math.e, log / len(numbers))
+
+
+class BenchmarkSuite(object):
+
+  def __init__(self, name):
+    self.name = name
+    self.results = {}
+    self.tests = []
+    self.avgresult = {}
+    self.sigmaresult = {}
+    self.numresult = {}
+    self.kClassicScoreSuites = ["SunSpider", "Kraken"]
+    self.kGeometricScoreSuites = ["Octane"]
+
+
+  def RecordResult(self, test, result):
+    if test not in self.tests:
+      self.tests += [test]
+      self.results[test] = []
+    self.results[test] += [int(result)]
+
+  def ThrowAwayWorstResult(self, results):
+    if len(results) <= 1: return
+    if self.name in self.kClassicScoreSuites:
+      results.pop()
+    elif self.name in self.kGeometricScoreSuites:
+      del results[0]
+
+  def ProcessResults(self, opts):
+    for test in self.tests:
+      results = self.results[test]
+      results.sort()
+      self.ThrowAwayWorstResult(results)
+      mean = sum(results) * 1.0 / len(results)
+      self.avgresult[test] = mean
+      sigma_divisor = len(results) - 1
+      if sigma_divisor == 0:
+        sigma_divisor = 1
+      self.sigmaresult[test] = math.sqrt(
+          sum((x - mean) ** 2 for x in results) / sigma_divisor)
+      self.numresult[test] = len(results)
+      if opts.verbose:
+        if not test in ["Octane"]:
+          print("%s,%.1f,%.2f,%d" %
+              (test, self.avgresult[test],
+               self.sigmaresult[test], self.numresult[test]))
+
+  def ComputeScoreGeneric(self):
+    self.score = 0
+    self.sigma = 0
+    for test in self.tests:
+      self.score += self.avgresult[test]
+      self.sigma += self.sigmaresult[test]
+      self.num = self.numresult[test]
+
+  def ComputeScoreV8Octane(self, name):
+    # The score for the run is stored with the form
+    # "Octane-octane2.1(Score): <score>"
+    found_name = ''
+    for s in self.avgresult.keys():
+      if re.search("^Octane", s):
+        found_name = s
+        break
+
+    self.score = self.avgresult[found_name]
+    self.sigma = 0
+    for test in self.tests:
+      self.sigma += self.sigmaresult[test]
+      self.num = self.numresult[test]
+    self.sigma /= len(self.tests)
+
+  def ComputeScore(self):
+    if self.name in self.kClassicScoreSuites:
+      self.ComputeScoreGeneric()
+    elif self.name in self.kGeometricScoreSuites:
+      self.ComputeScoreV8Octane(self.name)
+    else:
+      print "Don't know how to compute score for suite: '%s'" % self.name
+
+  def IsBetterThan(self, other):
+    if self.name in self.kClassicScoreSuites:
+      return self.score < other.score
+    elif self.name in self.kGeometricScoreSuites:
+      return self.score > other.score
+    else:
+      print "Don't know how to compare score for suite: '%s'" % self.name
+
+
+class BenchmarkRunner(object):
+  def __init__(self, args, current_directory, opts):
+    self.best = {}
+    self.second_best = {}
+    self.args = args
+    self.opts = opts
+    self.current_directory = current_directory
+    self.outdir = os.path.join(opts.cachedir, "_benchmark_runner_data")
+
+  def Run(self):
+    if not os.path.exists(self.outdir):
+      os.mkdir(self.outdir)
+
+    self.RunCommand()
+    # Figure out the suite from the command line (heuristic) or the current
+    # working directory.
+    teststr = opts.command.lower() + " " + self.current_directory.lower()
+    if teststr.find('octane') >= 0:
+      suite = 'Octane'
+    elif teststr.find('sunspider') >= 0:
+      suite = 'SunSpider'
+    elif teststr.find('kraken') >= 0:
+      suite = 'Kraken'
+    else:
+      suite = 'Generic'
+
+    self.ProcessOutput(suite)
+
+  def RunCommand(self):
+    for i in range(self.opts.runs):
+      outfile = "%s/out.%d.txt" % (self.outdir, i)
+      if os.path.exists(outfile) and not self.opts.force:
+        continue
+      print "run #%d" % i
+      cmdline = "%s > %s" % (self.opts.command, outfile)
+      subprocess.call(cmdline, shell=True)
+      time.sleep(self.opts.sleep)
+
+  def ProcessLine(self, line):
+    # Octane puts this line in before score.
+    if line == "----":
+      return (None, None)
+
+    # Kraken or Sunspider?
+    g = re.match("(?P<test_name>\w+(-\w+)*)\(RunTime\): (?P<score>\d+) ms\.", \
+        line)
+    if g == None:
+      # Octane?
+      g = re.match("(?P<test_name>\w+): (?P<score>\d+)", line)
+      if g == None:
+        g = re.match("Score \(version [0-9]+\): (?P<score>\d+)", line)
+        if g != None:
+          return ('Octane', g.group('score'))
+        else:
+          # Generic?
+          g = re.match("(?P<test_name>\w+)\W+(?P<score>\d+)", line)
+          if g == None:
+            return (None, None)
+    return (g.group('test_name'), g.group('score'))
+
+  def ProcessOutput(self, suitename):
+    suite = BenchmarkSuite(suitename)
+    for i in range(self.opts.runs):
+      outfile = "%s/out.%d.txt" % (self.outdir, i)
+      with open(outfile, 'r') as f:
+        for line in f:
+          (test, result) = self.ProcessLine(line)
+          if test != None:
+            suite.RecordResult(test, result)
+
+    suite.ProcessResults(self.opts)
+    suite.ComputeScore()
+    print ("%s,%.1f,%.2f,%d " %
+        (suite.name, suite.score, suite.sigma, suite.num)),
+    if self.opts.verbose:
+      print ""
+    print ""
+
+
+if __name__ == '__main__':
+  parser = OptionParser(usage=__doc__)
+  parser.add_option("-c", "--command", dest="command",
+                    help="Command to run the test suite.")
+  parser.add_option("-r", "--runs", dest="runs", default=4,
+                    help="Number of runs")
+  parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+                    default=False, help="Print results for each test")
+  parser.add_option("-f", "--force", dest="force", action="store_true",
+                    default=False,
+                    help="Force re-run even if output files exist")
+  parser.add_option("-z", "--sleep", dest="sleep", default=0,
+                    help="Number of seconds to sleep between runs")
+  parser.add_option("-d", "--run-directory", dest="cachedir",
+                    help="Directory where a cache directory will be created")
+  (opts, args) = parser.parse_args()
+  opts.runs = int(opts.runs)
+  opts.sleep = int(opts.sleep)
+
+  if not opts.command:
+    print "You must specify the command to run (-c). Aborting."
+    sys.exit(1)
+
+  cachedir = os.path.abspath(os.getcwd())
+  if not opts.cachedir:
+    opts.cachedir = cachedir
+  if not os.path.exists(opts.cachedir):
+    print "Directory " + opts.cachedir + " is not valid. Aborting."
+    sys.exit(1)
+
+  br = BenchmarkRunner(args, os.getcwd(), opts)
+  br.Run()
--- a/test/benchmarks/csuite/compare-baseline.py
+++ b/test/benchmarks/csuite/compare-baseline.py
@ -0,0 +1,264 @@
+#!/usr/bin/python
+# Copyright 2018 the V8 project authors. All rights reserved.
+
+'''
+python %prog [options] [baseline_files]
+
+Compare benchmark results from the benchmark runner against one or
+more baselines. You can either pipe the result of the benchmark
+runner directly into this script or specify the results file with
+the -f option.
+'''
+
+import csv
+import math
+from optparse import OptionParser
+import os
+import sys
+
+PERCENT_CONSIDERED_SIGNIFICANT = 0.5
+PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02
+PROBABILITY_CONSIDERED_MEANINGLESS = 0.05
+
+RESET_SEQ = "\033[0m"
+RED_SEQ = "\033[31m"
+GREEN_SEQ = "\033[32m"
+BLUE_SEQ = "\033[34m"
+BOLD_SEQ = "\033[1m"
+
+v8_benchmarks = ["V8", "Octane", "Richards", "DeltaBlue", "Crypto",
+                 "EarleyBoyer", "RayTrace", "RegExp", "Splay", "SplayLatency",
+                 "NavierStokes", "PdfJS", "Mandreel", "MandreelLatency",
+                 "Gameboy", "CodeLoad", "Box2D", "zlib", "Typescript"]
+
+suite_names = ["V8", "Octane", "Kraken-Orig", "Kraken-Once", "Kraken",
+               "SunSpider", "SunSpider-Once", "SunSpider-Orig"]
+
+def ColorText(opts, text):
+  if opts.no_color:
+    result = text.replace("$RESET", "")
+    result = result.replace("$BLUE", "")
+    result = result.replace("$RED", "")
+    result = result.replace("$GREEN", "")
+    result = result.replace("$BOLD", "")
+  else:
+    if opts.html:
+      result = text.replace("$RESET", "</font></b>")
+      result = result.replace("$BLUE", "<font COLOR=\"0000DD\">")
+      result = result.replace("$RED", "<font COLOR=\"DD0000\">")
+      result = result.replace("$GREEN", "<font COLOR=\"00DD00\">")
+      result = result.replace("$BOLD", "<b>")
+    else:
+      result = text.replace("$RESET", RESET_SEQ)
+      result = result.replace("$BLUE", BLUE_SEQ)
+      result = result.replace("$RED", RED_SEQ)
+      result = result.replace("$GREEN", GREEN_SEQ)
+      result = result.replace("$BOLD", BOLD_SEQ)
+  return result
+
+def NormalizedSigmaToString(normalized_sigma):
+  assert normalized_sigma >= 0
+  if normalized_sigma < PROBABILITY_CONSIDERED_SIGNIFICANT:
+    return "|"
+  return "S"
+
+def ComputeZ(baseline_avg, baseline_sigma, mean, n):
+  if baseline_sigma == 0:
+    return 1000.0;
+  return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n)))
+
+# Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html
+def ComputeProbability(z):
+  if z > 2.575829: # p 0.005: two sided < 0.01
+    return 0
+  if z > 2.326348: # p 0.010
+    return 0.01
+  if z > 2.170091: # p 0.015
+    return 0.02
+  if z > 2.053749: # p 0.020
+    return 0.03
+  if z > 1.959964: # p 0.025: two sided < 0.05
+    return 0.04
+  if z > 1.880793: # p 0.030
+    return 0.05
+  if z > 1.811910: # p 0.035
+    return 0.06
+  if z > 1.750686: # p 0.040
+    return 0.07
+  if z > 1.695397: # p 0.045
+    return 0.08
+  if z > 1.644853: # p 0.050: two sided < 0.10
+    return 0.09
+  if z > 1.281551: # p 0.100: two sided < 0.20
+    return 0.10
+  return 0.20 # two sided p >= 0.20
+
+def PercentColor(change_percent, flakyness):
+  result = ""
+  if change_percent >= PERCENT_CONSIDERED_SIGNIFICANT:
+    result = "$GREEN"
+  elif change_percent <= -PERCENT_CONSIDERED_SIGNIFICANT:
+    result = "$RED"
+  else:
+    return ""
+  if flakyness < PROBABILITY_CONSIDERED_SIGNIFICANT:
+    result += "$BOLD"
+  elif flakyness > PROBABILITY_CONSIDERED_MEANINGLESS:
+    result = ""
+  return result
+
+def ProcessOneResultLine(opts, suite, testname, time, sigma, num, baselines):
+  time = float(time)
+  sigma = float(sigma)
+  num = int(num)
+  if testname in suite_names:
+    base_color = "$BOLD"
+  else:
+    base_color = ""
+  if opts.html:
+    line_out = ("<tr><td>%s%s$RESET</td><td>%s%8.1f$RESET</td>" %
+                (base_color, testname, base_color, time))
+  else:
+    sigma_string = NormalizedSigmaToString(sigma / time)
+    line_out = ("%s%40s$RESET: %s%8.1f$RESET %s" %
+                (base_color, testname, base_color, time, sigma_string))
+  for baseline in baselines:
+    raw_score = ""
+    compare_score = ""
+    found = False
+    if suite in baseline[1]:
+      baseline_results = baseline[1][suite]
+      for item in baseline_results:
+        if testname == item[0]:
+          found = True
+          raw_score_num = float(item[1])
+          raw_sigma_num = float(item[2])
+          raw_score = "%7.1f" % raw_score_num
+          compare_num = 0
+          compare_score = ""
+          percent_color = ""
+          if testname in v8_benchmarks:
+            compare_num = 100*time/raw_score_num - 100
+          else:
+            compare_num = 100*raw_score_num/time - 100
+          if abs(compare_num) > 0.1:
+            compare_score = "%3.1f" % (compare_num)
+            z = ComputeZ(raw_score_num, raw_sigma_num, time, num)
+            p = ComputeProbability(z)
+            percent_color = PercentColor(compare_num, p)
+          sigma_string = NormalizedSigmaToString(raw_sigma_num / raw_score_num)
+          if opts.html:
+            format_string = "<td>%s%8s$RESET</td><td>%s%6s$RESET</td>"
+          else:
+            format_string = " %s%8s$RESET %s %s%6s$RESET |"
+          line_out += (format_string %
+              (base_color, raw_score, sigma_string,
+               percent_color, compare_score))
+    if not found:
+      if opts.html:
+        line_out += "<td></td><td></td>"
+      else:
+        line_out += "|          |        "
+  if opts.html:
+    line_out += "</tr>"
+  print(ColorText(opts, line_out))
+
+def PrintSeparator(opts, baselines, big):
+  if not opts.html:
+    if big:
+      separator = "==================================================="
+    else:
+      separator = "---------------------------------------------------"
+    for baseline in baselines:
+      if big:
+        separator += "+==========+========"
+      else:
+        separator += "+----------+--------"
+    separator += "+"
+    print(separator)
+
+def ProcessResults(opts, results, baselines):
+  for suite in suite_names:
+    if suite in results:
+      for result in results[suite]:
+        ProcessOneResultLine(opts, suite, result[0], result[1], result[2],
+                             result[3], baselines);
+      PrintSeparator(opts, baselines, False)
+
+def ProcessFile(file_path):
+  file_reader = csv.reader(open(file_path, 'rb'), delimiter=',')
+  benchmark_results = {}
+  current_rows = []
+  for row in file_reader:
+    if len(row) > 1:
+      current_rows.append(row)
+      for suite in suite_names:
+        if row[0] == suite:
+          benchmark_results[row[0]] = current_rows
+          current_rows = []
+  return benchmark_results
+
+def ProcessStdIn():
+  benchmark_results = {}
+  current_rows = []
+  for line_in in sys.stdin:
+    line_in = line_in.rstrip()
+    row = line_in.split(",")
+    if len(row) > 1:
+      current_rows.append(row)
+      for suite in suite_names:
+        if row[0] == suite:
+          benchmark_results[row[0]] = current_rows
+          current_rows = []
+  return benchmark_results
+
+def CompareFiles(opts, args):
+  results = []
+  baselines = []
+  for file_path in args:
+    baseline = ProcessFile(file_path)
+    baselines.append((os.path.basename(file_path), baseline))
+  if opts.html:
+    header = "<tr><th>benchmark</th><th>score</th>"
+  else:
+    header = "%40s: %8s " % ("benchmark", "score")
+  for baseline in baselines:
+    (baseline_name, baseline_results) = baseline
+    if opts.html:
+      header += ("<th>%s</th><th>%s</th>") % (baseline_name[0:7], "%")
+    else:
+      header += "| %8s | %6s " % (baseline_name[0:7], "%")
+  if opts.html:
+    header += "</tr>\n"
+  else:
+    header += "|"
+  print(header)
+  PrintSeparator(opts, baselines, True)
+  if opts.filename:
+    file_reader = csv.reader(open(opts.filename, 'rb'), delimiter=',')
+    results = ProcessFile(opts.filename)
+  else:
+    results = ProcessStdIn()
+  ProcessResults(opts, results, baselines)
+
+if __name__ == '__main__':
+  parser = OptionParser(usage=__doc__)
+  parser.add_option("-f", "--filename", dest="filename",
+                    help="Specifies the filename for the results to "\
+"compare to the baselines rather than reading from stdin.")
+  parser.add_option("-b", "--baselines", dest="baselines",
+                    help="Specifies a directory of baseline files to "\
+"compare against.")
+  parser.add_option("-n", "--no-color", action="store_true",
+                    dest="no_color", default=False,
+                    help="Generates output without escape codes that "\
+"add color highlights.")
+  parser.add_option("--html", action="store_true",
+                    dest="html", default=False,
+                    help="Generates output as a HTML table ")
+  (opts, args) = parser.parse_args()
+  if opts.baselines:
+    args.extend(map(lambda x: (opts.baselines + "/" + x),
+                    (os.listdir(opts.baselines))))
+  args = reversed(sorted(args))
+  CompareFiles(opts, args)
--- a/test/benchmarks/csuite/csuite.py
+++ b/test/benchmarks/csuite/csuite.py
@ -0,0 +1,154 @@
+#!/usr/bin/python
+# Copyright 2018 the V8 project authors. All rights reserved.
+'''
+C S u i t e                                         because who can remember?
+-----------------------------------------------------------------------------
+python csuite.py [options] <benchmark> <mode> <d8 path>
+
+Arguments
+  benchmark: one of octane, sunspider or kraken.
+  mode: baseline or compare.
+  d8 path: a valid path to the d8 executable you want to use.
+
+CSuite is a wrapper around benchmark.py and compare-baseline.py, old
+friends in the d8 benchmarking world. Unlike those tools, it can be
+run in any directory. It's also opinionated about which benchmarks it
+will run, currently SunSpider, Octane and Kraken. Furthermore, it
+runs the versions we pull into ./test/benchmarks/data.
+
+Examples:
+
+Say you want to see how much optimization buys you:
+  ./csuite.py kraken baseline ~/src/v8/out/d8 -x="--noopt"
+  ./csuite.py kraken compare ~/src/v8/out/d8
+
+Suppose you are comparing two binaries, quick n' dirty style:
+  ./csuite.py -r 3 octane baseline ~/src/v8/out-master/d8
+  ./csuite.py -r 3 octane compare ~/src/v8/out-mine/d8
+
+You can run from any place:
+  ../../somewhere-strange/csuite.py sunspider baseline ./d8
+  ../../somewhere-strange/csuite.py sunspider compare ./d8-better
+'''
+
+import os
+from optparse import OptionParser
+import subprocess
+import sys
+
+if __name__ == '__main__':
+  parser = OptionParser(usage=__doc__)
+  parser.add_option("-r", "--runs", dest="runs",
+      help="Override the default number of runs for the benchmark.")
+  parser.add_option("-x", "--extra-arguments", dest="extra_args",
+      help="Pass these extra arguments to d8.")
+  parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
+      help="See more output about what magic csuite is doing.")
+  (opts, args) = parser.parse_args()
+
+  if len(args) < 3:
+    print 'not enough arguments'
+    sys.exit(1)
+
+  suite = args[0]
+  mode = args[1]
+
+  if suite not in ['octane', 'sunspider', 'kraken']:
+    print 'Suite must be octane, sunspider or kraken. Aborting.'
+    sys.exit(1)
+
+  if mode != 'baseline' and mode != 'compare':
+    print 'mode must be baseline or compare. Aborting.'
+    sys.exit(1)
+
+  # Set up paths.
+  d8_path = os.path.abspath(args[2])
+  if not os.path.exists(d8_path):
+    print d8_path + " is not valid."
+    sys.exit(1)
+
+  csuite_path = os.path.dirname(os.path.abspath(__file__))
+  if not os.path.exists(csuite_path):
+    print "The csuite directory is invalid."
+    sys.exit(1)
+
+  benchmark_py_path = os.path.join(csuite_path, "benchmark.py")
+  if not os.path.exists(benchmark_py_path):
+    print "Unable to find benchmark.py in " + output_path_base \
+        + ". Aborting."
+    sys.exit(1)
+
+  compare_baseline_py_path = os.path.join(csuite_path,
+      "compare-baseline.py")
+
+  if not os.path.exists(compare_baseline_py_path):
+    print "Unable to find compare-baseline.py in " + output_path_base \
+        + ". Aborting."
+    sys.exit(1)
+
+  benchmark_path = os.path.abspath(os.path.join(csuite_path, "../data"))
+  if not os.path.exists(benchmark_path):
+    print "I can't find the benchmark data directory. Aborting."
+    sys.exit(1)
+
+  # Gather the remaining arguments into a string of extra args for d8.
+  extra_args = ""
+  if opts.extra_args:
+    extra_args = opts.extra_args
+
+  if suite == "octane":
+    runs = 10
+    suite_path = os.path.join(benchmark_path, "octane")
+    cmd = "run.js"
+  elif suite == "kraken":
+    runs = 80
+    suite_path = os.path.join(benchmark_path, "kraken")
+    cmd = os.path.join(csuite_path, "run-kraken.js")
+  else:
+    runs = 100
+    suite_path = os.path.join(benchmark_path, "sunspider")
+    cmd = os.path.join(csuite_path, "sunspider-standalone-driver.js")
+
+  if opts.runs:
+    if (float(opts.runs) / runs) < 0.6:
+      print "Normally, %s requires %d runs to get stable results." \
+          % (suite, runs)
+    runs = int(opts.runs)
+
+  if opts.verbose:
+    print "Running and averaging %s %d times." % (suite, runs)
+
+  # Ensure output directory is setup
+  output_path_base = os.path.abspath(os.getcwd())
+  output_path = os.path.join(output_path_base, "_results")
+  output_file = os.path.join(output_path, "master")
+  if not os.path.exists(output_path):
+    if opts.verbose:
+      print "Creating directory %s." % output_path
+    os.mkdir(output_path)
+
+  if opts.verbose:
+    print "Working directory for runs is %s." % suite_path
+
+  inner_command = " -c \"%s --expose-gc %s %s \"" \
+      % (d8_path, extra_args, cmd)
+  if opts.verbose:
+    print "calling d8 like so: %s." % inner_command
+
+  cmdline_base = "python %s %s -fv -r %d -d %s" \
+      % (benchmark_py_path, inner_command, runs, output_path_base)
+
+  if mode == "baseline":
+    cmdline = "%s > %s" % (cmdline_base, output_file)
+  else:
+    cmdline = "%s | %s %s" \
+        % (cmdline_base, compare_baseline_py_path, output_file)
+
+  if opts.verbose:
+    print "Spawning subprocess: %s." % cmdline
+  return_code = subprocess.call(cmdline, shell=True, cwd=suite_path)
+  if return_code < 0:
+    print "Error return code: %d." % return_code
+  if mode == "baseline":
+    print "Wrote %s." % output_file
+    print "Run %s again with compare mode to see results." % suite
--- a/test/benchmarks/csuite/run-kraken.js
+++ b/test/benchmarks/csuite/run-kraken.js
@ -0,0 +1,63 @@
+// Copyright 2018 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file was copied from the output of
+// http://hg.mozilla.org/projects/kraken/sunspider script.
+
+var benchmarkPath = "";
+var tests = [ "ai-astar", "audio-beat-detection", "audio-dft", "audio-fft", "audio-oscillator", "imaging-gaussian-blur", "imaging-darkroom", "imaging-desaturate", "json-parse-financial", "json-stringify-tinderbox", "stanford-crypto-aes", "stanford-crypto-ccm", "stanford-crypto-pbkdf2", "stanford-crypto-sha256-iterative" ];
+var categories = [ "ai", "audio", "imaging", "json", "stanford" ];
+var results = new Array();
+
+var time = 0;
+var times = [];
+
+times.length = tests.length;
+
+for (var krakenCounter = 0; krakenCounter < tests.length; krakenCounter++) {
+    var testBase = benchmarkPath + tests[krakenCounter];
+    var testName = testBase + ".js";
+    var testData = testBase + "-data.js";
+    // load test data
+    load(testData);
+    var startTime = new Date;
+    load(testName);
+    times[krakenCounter] = new Date() - startTime;
+    gc();
+}
+
+function recordResults(tests, times)
+{
+    var output = "";
+
+    for (j = 0; j < tests.length; j++) {
+        output += tests[j] + '-orig(RunTime): ' + times[j] + ' ms.\n';
+    }
+    print(output);
+}
+
+recordResults(tests, times);
--- a/test/benchmarks/csuite/sunspider-standalone-driver.js
+++ b/test/benchmarks/csuite/sunspider-standalone-driver.js
@ -0,0 +1,75 @@
+// Copyright 2018 the V8 project authors. All rights reserved.
+/*
+ * Copyright (C) 2007 Apple Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+var suitePath = "sunspider-0.9.1";
+var tests = [ "3d-cube", "3d-morph", "3d-raytrace",
+              "access-binary-trees", "access-fannkuch",
+              "access-nbody", "access-nsieve",
+              "bitops-3bit-bits-in-byte", "bitops-bits-in-byte",
+              "bitops-bitwise-and", "bitops-nsieve-bits",
+              "controlflow-recursive", "crypto-aes",
+              "crypto-md5", "crypto-sha1", "date-format-tofte",
+              "date-format-xparb", "math-cordic", "math-partial-sums",
+              "math-spectral-norm", "regexp-dna", "string-base64",
+              "string-fasta", "string-tagcloud", "string-unpack-code",
+              "string-validate-input" ];
+var categories = [ "3d", "access", "bitops", "controlflow", "crypto",
+                   "date", "math", "regexp", "string" ];
+
+var results = new Array();
+
+(function(){
+
+var time = 0;
+var times = [];
+times.length = tests.length;
+
+for (var j = 0; j < tests.length; j++) {
+    var testName = tests[j] + ".js";
+    var startTime = new Date;
+    if (testName.indexOf('parse-only') >= 0)
+        checkSyntax(testName);
+    else
+        load(testName);
+    times[j] = new Date() - startTime;
+    gc();
+}
+
+function recordResults(tests, times)
+{
+    var output = "";
+    // Changed original output to match test infrastructure.
+    for (j = 0; j < tests.length; j++) {
+        output += tests[j] + '-sunspider(RunTime): ' +
+        Math.max(times[j], 1) + ' ms.\n';
+    }
+
+    print(output);
+}
+
+recordResults(tests, times);
+
+})();