[coverage] Enable sanitizer coverage.

This adds sanitizer-coverage compilation, test-runner features and post processing. Sanitizer coverage is expected to be used together with asan. During test runner execution, the produced sancov files are disambiguated and match the pattern: <executable name>.test.<test id>.sancov. Two additional scripts are added for merging raw sancov files and for generating json data containing all instrumented lines + all covered lines from merged sancov files. Both scripts use multiprocessing for speed. The json data will later be uploaded to google storage for further use, e.g. to show coverage data in rietveld. Sancov documentation: http://clang.llvm.org/docs/SanitizerCoverage.html BUG=chromium:568949 LOG=n NOTRY=true TEST=python -m unittest sancov_formatter_test TEST=python -m unittest sancov_merger_test Review URL: https://codereview.chromium.org/1737263003 Cr-Commit-Position: refs/heads/master@{#34578}
2016-03-08 02:47:58 -08:00 · 2016-03-08 02:47:58 -08:00 · 33ffced5cc
commit 33ffced5cc
parent cabe6844c2
13 changed files with 884 additions and 22 deletions
--- a/build/coverage_wrapper.py
+++ b/build/coverage_wrapper.py
@ -11,18 +11,26 @@ import subprocess
 import sys

 exclusions = [
+  'buildtools',
  'src/third_party',
  'third_party',
  'test',
  'testing',
 ]

+def remove_if_exists(string_list, item):
+  if item in string_list:
+    string_list.remove(item)
+
 args = sys.argv[1:]
 text = ' '.join(sys.argv[2:])
 for exclusion in exclusions:
  if re.search(r'\-o obj/%s[^ ]*\.o' % exclusion, text):
-    args.remove('-fprofile-arcs')
-    args.remove('-ftest-coverage')
+    remove_if_exists(args, '-fprofile-arcs')
+    remove_if_exists(args, '-ftest-coverage')
+    remove_if_exists(args, '-fsanitize-coverage=func')
+    remove_if_exists(args, '-fsanitize-coverage=bb')
+    remove_if_exists(args, '-fsanitize-coverage=edge')
    break

 sys.exit(subprocess.check_call(args))
--- a/build/standalone.gypi
+++ b/build/standalone.gypi
@ -68,7 +68,9 @@
        'target_arch%': '<(host_arch)',
        'base_dir%': '<!(cd <(DEPTH) && python -c "import os; print os.getcwd()")',

-        # Instrument for code coverage with gcov.
+        # Instrument for code coverage and use coverage wrapper to exclude some
+        # files. Uses gcov if clang=0 is set explicitly. Otherwise,
+        # sanitizer_coverage must be set too.
        'coverage%': 0,
      },
      'base_dir%': '<(base_dir)',
@ -122,8 +124,7 @@
        }, {
          'gomadir': '<!(/bin/echo -n ${HOME}/goma)',
        }],
-        ['host_arch!="ppc" and host_arch!="ppc64" and host_arch!="ppc64le" and host_arch!="s390" and host_arch!="s390x" and \
-          coverage==0', {
+        ['host_arch!="ppc" and host_arch!="ppc64" and host_arch!="ppc64le" and host_arch!="s390" and host_arch!="s390x"', {
          'host_clang%': 1,
        }, {
          'host_clang%': 0,
@ -228,7 +229,7 @@
        'v8_enable_gdbjit%': 0,
      }],
      ['(OS=="linux" or OS=="mac") and (target_arch=="ia32" or target_arch=="x64") and \
-        (v8_target_arch!="x87" and v8_target_arch!="x32") and coverage==0', {
+        (v8_target_arch!="x87" and v8_target_arch!="x32")', {
        'clang%': 1,
      }, {
        'clang%': 0,
@ -727,7 +728,7 @@
          [ 'component=="shared_library"', {
            'cflags': [ '-fPIC', ],
          }],
-          [ 'coverage==1', {
+          [ 'clang==0 and coverage==1', {
            'cflags': [ '-fprofile-arcs', '-ftest-coverage'],
            'ldflags': [ '-fprofile-arcs'],
          }],
--- a/tools/run-deopt-fuzzer.py
+++ b/tools/run-deopt-fuzzer.py
@ -387,7 +387,8 @@ def Execute(arch, mode, args, options, suites, workspace):
                        0,  # No use of a rerun-failing-tests maximum.
                        False,  # No predictable mode.
                        False,  # No no_harness mode.
-                        False)   # Don't use perf data.
+                        False,  # Don't use perf data.
+                        False)  # Coverage not supported.

  # Find available test suites and read test cases from them.
  variables = {
--- a/tools/run-tests.py
+++ b/tools/run-tests.py
@ -208,6 +208,8 @@ def BuildOptions():
  result.add_option("--asan",
                    help="Regard test expectations for ASAN",
                    default=False, action="store_true")
+  result.add_option("--sancov-dir",
+                    help="Directory where to collect coverage data")
  result.add_option("--cfi-vptr",
                    help="Run tests with UBSAN cfi_vptr option.",
                    default=False, action="store_true")
@ -385,6 +387,14 @@ def SetupEnvironment(options):
  if options.asan:
    os.environ['ASAN_OPTIONS'] = symbolizer

+  if options.sancov_dir:
+    assert os.path.exists(options.sancov_dir)
+    os.environ['ASAN_OPTIONS'] = ":".join([
+      'coverage=1',
+      'coverage_dir=%s' % options.sancov_dir,
+      symbolizer,
+    ])
+
  if options.cfi_vptr:
    os.environ['UBSAN_OPTIONS'] = ":".join([
      'print_stacktrace=1',
@ -688,7 +698,8 @@ def Execute(arch, mode, args, options, suites):
                        options.rerun_failures_max,
                        options.predictable,
                        options.no_harness,
-                        use_perf_data=not options.swarming)
+                        use_perf_data=not options.swarming,
+                        sancov_dir=options.sancov_dir)

  # TODO(all): Combine "simulator" and "simulator_run".
  simulator_run = not options.dont_skip_simulator_slow_tests and \
--- a/tools/sanitizers/sancov_formatter.py
+++ b/tools/sanitizers/sancov_formatter.py
@ -0,0 +1,397 @@
+#!/usr/bin/env python
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to transform and merge sancov files into human readable json-format.
+
+The script supports two actions:
+all: Writes a json file with all instrumented lines of all executables.
+merge: Merges sancov files with coverage output into an existing json file.
+
+The json data is structured as follows:
+{
+  "version": 1,
+  "tests": ["executable1", "executable2", ...],
+  "files": {
+    "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
+    "file2": [...],
+    ...
+  }
+}
+
+The executables are sorted and determine the test bit mask. Their index+1 is
+the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
+executable1 and executable3 will have bit_mask == 5 == 0b101. The number of
+tests is restricted to 52 in version 1, to allow javascript JSON parsing of
+the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.
+
+The line-number-bit_mask pairs are sorted by line number and don't contain
+duplicates.
+
+The sancov tool is expected to be in the llvm compiler-rt third-party
+directory. It's not checked out by default and must be added as a custom deps:
+'v8/third_party/llvm/projects/compiler-rt':
+    'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+import sys
+
+from multiprocessing import Pool, cpu_count
+
+
+logging.basicConfig(level=logging.INFO)
+
+# Files to exclude from coverage. Dropping their data early adds more speed.
+# The contained cc files are already excluded from instrumentation, but inlined
+# data is referenced through v8's object files.
+EXCLUSIONS = [
+  'buildtools',
+  'src/third_party',
+  'third_party',
+  'test',
+  'testing',
+]
+
+# Executables found in the build output for which no coverage is generated.
+# Exclude them from the coverage data file.
+EXE_BLACKLIST = [
+  'generate-bytecode-expectations',
+  'hello-world',
+  'mksnapshot',
+  'parser-shell',
+  'process',
+  'shell',
+]
+
+# V8 checkout directory.
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
+    os.path.abspath(__file__))))
+
+# Executable location. TODO(machenbach): Only release is supported for now.
+BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
+
+# Path prefix added by the llvm symbolizer including trailing slash.
+OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
+
+# The sancov tool location.
+SANCOV_TOOL = os.path.join(
+    BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
+    'lib', 'sanitizer_common', 'scripts', 'sancov.py')
+
+# Simple script to sanitize the PCs from objdump.
+SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')
+
+# The llvm symbolizer location.
+SYMBOLIZER = os.path.join(
+    BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
+    'llvm-symbolizer')
+
+# Number of cpus.
+CPUS = cpu_count()
+
+# Regexp to find sancov files as output by sancov_merger.py. Also grabs the
+# executable name in group 1.
+SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
+
+
+def executables():
+  """Iterates over executable files in the build directory."""
+  for f in os.listdir(BUILD_DIR):
+    file_path = os.path.join(BUILD_DIR, f)
+    if (os.path.isfile(file_path) and
+        os.access(file_path, os.X_OK) and
+        f not in EXE_BLACKLIST):
+      yield file_path
+
+
+def process_symbolizer_output(output):
+  """Post-process llvm symbolizer output.
+
+  Excludes files outside the v8 checkout or given in exclusion list above
+  from further processing. Drops the character index in each line.
+
+  Returns: A mapping of file names to lists of line numbers. The file names
+           have relative paths to the v8 base directory. The lists of line
+           numbers don't contain duplicate lines and are sorted.
+  """
+  # Drop path prefix when iterating lines. The path is redundant and takes
+  # too much space. Drop files outside that path, e.g. generated files in
+  # the build dir and absolute paths to c++ library headers.
+  def iter_lines():
+    for line in output.strip().splitlines():
+      if line.startswith(OUTPUT_PATH_PREFIX):
+        yield line[len(OUTPUT_PATH_PREFIX):]
+
+  # Map file names to sets of instrumented line numbers.
+  file_map = {}
+  for line in iter_lines():
+    # Drop character number, we only care for line numbers. Each line has the
+    # form: <file name>:<line number>:<character number>.
+    file_name, number, _ = line.split(':')
+    file_map.setdefault(file_name, set([])).add(int(number))
+
+  # Remove exclusion patterns from file map. It's cheaper to do it after the
+  # mapping, as there are few excluded files and we don't want to do this
+  # check for numerous lines in ordinary files.
+  def keep(file_name):
+    for e in EXCLUSIONS:
+      if file_name.startswith(e):
+        return False
+    return True
+
+  # Return in serializable form and filter.
+  return {k: sorted(file_map[k]) for k in file_map if keep(k)}
+
+
+def get_instrumented_lines(executable):
+  """Return the instrumented lines of an executable.
+
+  Called trough multiprocessing pool.
+
+  Returns: Post-processed llvm output as returned by process_symbolizer_output.
+  """
+  # The first two pipes are from llvm's tool sancov.py with 0x added to the hex
+  # numbers. The results are piped into the llvm symbolizer, which outputs for
+  # each PC: <file name with abs path>:<line number>:<character number>.
+  # We don't call the sancov tool to get more speed.
+  process = subprocess.Popen(
+      'objdump -d %s | '
+      'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
+      '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
+      'grep \'^\s\+[0-9a-f]\+\' -o | '
+      '%s | '
+      '%s --obj %s -functions=none' %
+          (executable, SANITIZE_PCS, SYMBOLIZER, executable),
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      stdin=subprocess.PIPE,
+      cwd=BASE_DIR,
+      shell=True,
+  )
+  output, _ = process.communicate()
+  assert process.returncode == 0
+  return process_symbolizer_output(output)
+
+
+def merge_instrumented_line_results(exe_list, results):
+  """Merge multiprocessing results for all instrumented lines.
+
+  Args:
+    exe_list: List of all executable names with absolute paths.
+    results: List of results as returned by get_instrumented_lines.
+
+  Returns: Dict to be used as json data as specified on the top of this page.
+           The dictionary contains all instrumented lines of all files
+           referenced by all executables.
+  """
+  def merge_files(x, y):
+    for file_name, lines in y.iteritems():
+      x.setdefault(file_name, set([])).update(lines)
+    return x
+  result = reduce(merge_files, results, {})
+
+  # Return data as file->lines mapping. The lines are saved as lists
+  # with (line number, test bits (as int)). The test bits are initialized with
+  # 0, meaning instrumented, but no coverage.
+  # The order of the test bits is given with key 'tests'. For now, these are
+  # the executable names. We use a _list_ with two items instead of a tuple to
+  # ease merging by allowing mutation of the second item.
+  return {
+    'version': 1,
+    'tests': sorted(map(os.path.basename, exe_list)),
+    'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},
+  }
+
+
+def write_instrumented(options):
+  """Implements the 'all' action of this tool."""
+  exe_list = list(executables())
+  logging.info('Reading instrumented lines from %d executables.' %
+               len(exe_list))
+  pool = Pool(CPUS)
+  try:
+    results = pool.imap_unordered(get_instrumented_lines, exe_list)
+  finally:
+    pool.close()
+
+  # Merge multiprocessing results and prepare output data.
+  data = merge_instrumented_line_results(exe_list, results)
+
+  logging.info('Read data from %d executables, which covers %d files.' %
+               (len(data['tests']), len(data['files'])))
+  logging.info('Writing results to %s' % options.json_output)
+
+  # Write json output.
+  with open(options.json_output, 'w') as f:
+    json.dump(data, f, sort_keys=True)
+
+
+def get_covered_lines(args):
+  """Return the covered lines of an executable.
+
+  Called trough multiprocessing pool. The args are expected to unpack to:
+    cov_dir: Folder with sancov files merged by sancov_merger.py.
+    executable: The executable that was called to produce the given coverage
+                data.
+    sancov_file: The merged sancov file with coverage data.
+
+  Returns: A tuple of post-processed llvm output as returned by
+           process_symbolizer_output and the executable name.
+  """
+  cov_dir, executable, sancov_file = args
+
+  # Let the sancov tool print the covered PCs and pipe them through the llvm
+  # symbolizer.
+  process = subprocess.Popen(
+      '%s print %s 2> /dev/null | '
+      '%s --obj %s -functions=none' %
+          (SANCOV_TOOL,
+           os.path.join(cov_dir, sancov_file),
+           SYMBOLIZER,
+           os.path.join(BUILD_DIR, executable)),
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      stdin=subprocess.PIPE,
+      cwd=BASE_DIR,
+      shell=True,
+  )
+  output, _ = process.communicate()
+  assert process.returncode == 0
+  return process_symbolizer_output(output), executable
+
+
+def merge_covered_line_results(data, results):
+  """Merge multiprocessing results for covered lines.
+
+  The data is mutated, the results are merged into it in place.
+
+  Args:
+    data: Existing coverage data from json file containing all instrumented
+          lines.
+    results: List of results as returned by get_covered_lines.
+  """
+
+  # List of executables and mapping to the test bit mask. The number of
+  # tests is restricted to 52, to allow javascript JSON parsing of
+  # the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.
+  exe_list = data['tests']
+  assert len(exe_list) <= 52, 'Max 52 different tests are supported.'
+  test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
+
+  def merge_lines(old_lines, new_lines, mask):
+    """Merge the coverage data of a list of lines.
+
+    Args:
+      old_lines: Lines as list of pairs with line number and test bit mask.
+                 The new lines will be merged into the list in place.
+      new_lines: List of new (covered) lines (sorted).
+      mask: The bit to be set for covered lines. The bit index is the test
+            index of the executable that covered the line.
+    """
+    i = 0
+    # Iterate over old and new lines, both are sorted.
+    for l in new_lines:
+      while old_lines[i][0] < l:
+        # Forward instrumented lines not present in this coverage data.
+        i += 1
+        # TODO: Add more context to the assert message.
+        assert i < len(old_lines), 'Covered line %d not in input file.' % l
+      assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
+
+      # Add coverage information to the line.
+      old_lines[i][1] |= mask
+
+  def merge_files(data, result):
+    """Merge result into data.
+
+    The data is mutated in place.
+
+    Args:
+      data: Merged coverage data from the previous reduce step.
+      result: New result to be merged in. The type is as returned by
+              get_covered_lines.
+    """
+    file_map, executable = result
+    files = data['files']
+    for file_name, lines in file_map.iteritems():
+      merge_lines(files[file_name], lines, test_bit_masks[executable])
+    return data
+
+  reduce(merge_files, results, data)
+
+
+def merge(options):
+  """Implements the 'merge' action of this tool."""
+
+  # Check if folder with coverage output exists.
+  assert (os.path.exists(options.coverage_dir) and
+          os.path.isdir(options.coverage_dir))
+
+  # Inputs for multiprocessing. List of tuples of:
+  # Coverage dir, executable name, sancov file name.
+  inputs = []
+  for f in os.listdir(options.coverage_dir):
+    match = SANCOV_FILE_RE.match(f)
+    if match:
+      inputs.append((options.coverage_dir, match.group(1), f))
+
+  logging.info('Merging %d sancov files into %s' %
+               (len(inputs), options.json_input))
+
+  # Post-process covered lines in parallel.
+  pool = Pool(CPUS)
+  try:
+    results = pool.imap_unordered(get_covered_lines, inputs)
+  finally:
+    pool.close()
+
+  # Load existing json data file for merging the results.
+  with open(options.json_input, 'r') as f:
+    data = json.load(f)
+
+  # Merge muliprocessing results. Mutates data.
+  merge_covered_line_results(data, results)
+
+  logging.info('Merged data from %d executables, which covers %d files.' %
+               (len(data['tests']), len(data['files'])))
+  logging.info('Writing results to %s' % options.json_output)
+
+  # Write merged results to file.
+  with open(options.json_output, 'w') as f:
+    json.dump(data, f, sort_keys=True)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--coverage-dir',
+                      help='Path to the sancov output files.')
+  parser.add_argument('--json-input',
+                      help='Path to an existing json file with coverage data.')
+  parser.add_argument('--json-output', required=True,
+                      help='Path to a file to write json output to.')
+  parser.add_argument('action', choices=['all', 'merge'],
+                      help='Action to perform.')
+
+  options = parser.parse_args()
+  if options.action.lower() == 'all':
+    write_instrumented(options)
+  elif options.action.lower() == 'merge':
+    if not options.coverage_dir:
+      print '--coverage-dir is required'
+      return 1
+    if not options.json_input:
+      print '--json-input is required'
+      return 1
+    merge(options)
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
--- a/tools/sanitizers/sancov_formatter_test.py
+++ b/tools/sanitizers/sancov_formatter_test.py
@ -0,0 +1,159 @@
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Requires python-coverage. Native python coverage version >= 3.7.1 should
+# be installed to get the best speed.
+
+import copy
+import coverage
+import logging
+import os
+import sys
+import unittest
+
+
+# Directory of this file.
+LOCATION = os.path.dirname(os.path.abspath(__file__))
+
+# V8 checkout directory.
+BASE_DIR = os.path.dirname(os.path.dirname(LOCATION))
+
+# Executable location.
+BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
+
+def abs_line(line):
+  """Absolute paths as output by the llvm symbolizer."""
+  return '%s/%s' % (BUILD_DIR, line)
+
+
+#------------------------------------------------------------------------------
+
+# Data for test_process_symbolizer_output. This simulates output from the
+# llvm symbolizer. The paths are not normlized.
+SYMBOLIZER_OUTPUT = (
+  abs_line('../../src/foo.cc:87:7\n') +
+  abs_line('../../src/foo.cc:92:0\n') + # Test sorting.
+  abs_line('../../src/baz/bar.h:1234567:0\n') + # Test large line numbers.
+  abs_line('../../src/foo.cc:92:0\n') + # Test duplicates.
+  abs_line('../../src/baz/bar.h:0:0\n') + # Test subdirs.
+  '/usr/include/cool_stuff.h:14:2\n' + # Test dropping absolute paths.
+  abs_line('../../src/foo.cc:87:10\n') + # Test dropping character indexes.
+  abs_line('../../third_party/icu.cc:0:0\n') + # Test dropping excluded dirs.
+  abs_line('../../src/baz/bar.h:11:0\n')
+)
+
+# The expected post-processed output maps relative file names to line numbers.
+# The numbers are sorted and unique.
+EXPECTED_PROCESSED_OUTPUT = {
+  'src/baz/bar.h': [0, 11, 1234567],
+  'src/foo.cc': [87, 92],
+}
+
+
+#------------------------------------------------------------------------------
+
+# Data for test_merge_instrumented_line_results. A list of absolute paths to
+# all executables.
+EXE_LIST = [
+  '/path/to/d8',
+  '/path/to/cctest',
+  '/path/to/unittests',
+]
+
+# Post-processed llvm symbolizer output as returned by
+# process_symbolizer_output. These are lists of this output for merging.
+INSTRUMENTED_LINE_RESULTS = [
+  {
+    'src/baz/bar.h': [0, 3, 7],
+    'src/foo.cc': [11],
+  },
+  {
+    'src/baz/bar.h': [3, 7, 8],
+    'src/baz.cc': [2],
+    'src/foo.cc': [1, 92],
+  },
+  {
+    'src/baz.cc': [1],
+    'src/foo.cc': [92, 93],
+  },
+]
+
+# This shows initial instrumentation. No lines are covered, hence,
+# the coverage mask is 0 for all lines. The line tuples remain sorted by
+# line number and contain no duplicates.
+EXPECTED_INSTRUMENTED_LINES_DATA = {
+  'version': 1,
+  'tests': ['cctest', 'd8', 'unittests'],
+  'files': {
+    'src/baz/bar.h': [[0, 0], [3, 0], [7, 0], [8, 0]],
+    'src/baz.cc': [[1, 0], [2, 0]],
+    'src/foo.cc': [[1, 0], [11, 0], [92, 0], [93, 0]],
+  },
+}
+
+
+#------------------------------------------------------------------------------
+
+# Data for test_merge_covered_line_results. List of post-processed
+# llvm-symbolizer output as a tuple including the executable name of each data
+# set.
+COVERED_LINE_RESULTS = [
+  ({
+     'src/baz/bar.h': [3, 7],
+     'src/foo.cc': [11],
+   }, 'd8'),
+  ({
+     'src/baz/bar.h': [3, 7],
+     'src/baz.cc': [2],
+     'src/foo.cc': [1],
+   }, 'cctest'),
+  ({
+     'src/foo.cc': [92],
+     'src/baz.cc': [2],
+   }, 'unittests'),
+]
+
+# This shows initial instrumentation + coverage. The mask bits are:
+# cctest: 1, d8: 2, unittests:4. So a line covered by cctest and unittests
+# has a coverage mask of 0b101, e.g. line 2 in src/baz.cc.
+EXPECTED_COVERED_LINES_DATA = {
+  'version': 1,
+  'tests': ['cctest', 'd8', 'unittests'],
+  'files': {
+    'src/baz/bar.h': [[0, 0b0], [3, 0b11], [7, 0b11], [8, 0b0]],
+    'src/baz.cc': [[1, 0b0], [2, 0b101]],
+    'src/foo.cc': [[1, 0b1], [11, 0b10], [92, 0b100], [93, 0b0]],
+  },
+}
+
+
+class FormatterTests(unittest.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    sys.path.append(LOCATION)
+    cls._cov = coverage.coverage(
+        include=([os.path.join(LOCATION, 'sancov_formatter.py')]))
+    cls._cov.start()
+    import sancov_formatter
+    global sancov_formatter
+
+  @classmethod
+  def tearDownClass(cls):
+    cls._cov.stop()
+    cls._cov.report()
+
+  def test_process_symbolizer_output(self):
+    result = sancov_formatter.process_symbolizer_output(SYMBOLIZER_OUTPUT)
+    self.assertEquals(EXPECTED_PROCESSED_OUTPUT, result)
+
+  def test_merge_instrumented_line_results(self):
+    result = sancov_formatter.merge_instrumented_line_results(
+      EXE_LIST, INSTRUMENTED_LINE_RESULTS)
+    self.assertEquals(EXPECTED_INSTRUMENTED_LINES_DATA, result)
+
+  def test_merge_covered_line_results(self):
+    data = copy.deepcopy(EXPECTED_INSTRUMENTED_LINES_DATA)
+    sancov_formatter.merge_covered_line_results(
+      data, COVERED_LINE_RESULTS)
+    self.assertEquals(EXPECTED_COVERED_LINES_DATA, data)
--- a/tools/sanitizers/sancov_merger.py
+++ b/tools/sanitizers/sancov_merger.py
@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script for merging sancov files in parallel.
+
+The sancov files are expected
+to be located in one directory with the file-name pattern:
+<executable name>.test.<id>.sancov
+
+For each executable, this script writes a new file:
+<executable name>.result.sancov
+
+The sancov tool is expected to be in the llvm compiler-rt third-party
+directory. It's not checked out by default and must be added as a custom deps:
+'v8/third_party/llvm/projects/compiler-rt':
+    'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
+"""
+
+import argparse
+import logging
+import math
+import os
+import re
+import subprocess
+import sys
+
+from multiprocessing import Pool, cpu_count
+
+
+logging.basicConfig(level=logging.INFO)
+
+# V8 checkout directory.
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
+    os.path.abspath(__file__))))
+
+# The sancov tool location.
+SANCOV_TOOL = os.path.join(
+    BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
+    'lib', 'sanitizer_common', 'scripts', 'sancov.py')
+
+# Number of cpus.
+CPUS = cpu_count()
+
+# Regexp to find sancov file as output by the v8 test runner. Also grabs the
+# executable name in group 1.
+SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.sancov$')
+
+
+def merge(args):
+  """Merge several sancov files into one.
+
+  Called trough multiprocessing pool. The args are expected to unpack to:
+    keep: Option if source and intermediate sancov files should be kept.
+    coverage_dir: Folder where to find the sancov files.
+    executable: Name of the executable whose sancov files should be merged.
+    index: A number to be put into the intermediate result file name.
+           If None, this is a final result.
+    bucket: The list of sancov files to be merged.
+  Returns: A tuple with the executable name and the result file name.
+  """
+  keep, coverage_dir, executable, index, bucket = args
+  process = subprocess.Popen(
+      [SANCOV_TOOL, 'merge'] + bucket,
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      cwd=coverage_dir,
+  )
+  output, _ = process.communicate()
+  assert process.returncode == 0
+  if index is not None:
+    # This is an intermediate result, add the bucket index to the file name.
+    result_file_name = '%s.result.%d.sancov' % (executable, index)
+  else:
+    # This is the final result without bucket index.
+    result_file_name = '%s.result.sancov' % executable
+  with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
+    f.write(output)
+  if not keep:
+    for f in bucket:
+      os.remove(os.path.join(coverage_dir, f))
+  return executable, result_file_name
+
+
+def generate_inputs(keep, coverage_dir, file_map, cpus):
+  """Generate inputs for multiprocessed merging.
+
+  Splits the sancov files into several buckets, so that each bucket can be
+  merged in a separate process. We have only few executables in total with
+  mostly lots of associated files. In the general case, with many executables
+  we might need to avoid splitting buckets of executables with few files.
+
+  Returns: List of args as expected by merge above.
+  """
+  inputs = []
+  for executable, files in file_map.iteritems():
+    # What's the bucket size for distributing files for merging? E.g. with
+    # 2 cpus and 9 files we want bucket size 5.
+    n = max(2, int(math.ceil(len(files) / float(cpus))))
+
+    # Chop files into buckets.
+    buckets = [files[i:i+n] for i in xrange(0, len(files), n)]
+
+    # Inputs for multiprocessing. List of tuples containing:
+    # Keep-files option, base path, executable name, index of bucket,
+    # list of files.
+    inputs.extend([(keep, coverage_dir, executable, i, b)
+                   for i, b in enumerate(buckets)])
+  return inputs
+
+
+def merge_parallel(inputs):
+  """Process several merge jobs in parallel."""
+  pool = Pool(CPUS)
+  try:
+    return pool.map(merge, inputs)
+  finally:
+    pool.close()
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--coverage-dir', required=True,
+                      help='Path to the sancov output files.')
+  parser.add_argument('--keep', default=False, action='store_true',
+                      help='Keep sancov output files after merging.')
+  options = parser.parse_args()
+
+  # Check if folder with coverage output exists.
+  assert (os.path.exists(options.coverage_dir) and
+          os.path.isdir(options.coverage_dir))
+
+  # Map executable names to their respective sancov files.
+  file_map = {}
+  for f in os.listdir(options.coverage_dir):
+    match = SANCOV_FILE_RE.match(f)
+    if match:
+      file_map.setdefault(match.group(1), []).append(f)
+
+  inputs = generate_inputs(
+      options.keep, options.coverage_dir, file_map, CPUS)
+
+  logging.info('Executing %d merge jobs in parallel for %d executables.' %
+               (len(inputs), len(file_map)))
+
+  results = merge_parallel(inputs)
+
+  # Map executable names to intermediate bucket result files.
+  file_map = {}
+  for executable, f in results:
+    file_map.setdefault(executable, []).append(f)
+
+  # Merge the bucket results for each executable.
+  # The final result has index None, so no index will appear in the
+  # file name.
+  inputs = [(options.keep, options.coverage_dir, executable, None, files)
+             for executable, files in file_map.iteritems()]
+
+  logging.info('Merging %d intermediate results.' % len(inputs))
+
+  merge_parallel(inputs)
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
--- a/tools/sanitizers/sancov_merger_test.py
+++ b/tools/sanitizers/sancov_merger_test.py
@ -0,0 +1,82 @@
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import unittest
+
+import sancov_merger
+
+
+# Files on disk after test runner completes. The files are mapped by
+# executable name -> file list.
+FILE_MAP = {
+  'd8': [
+    'd8.test.1.sancov',
+    'd8.test.2.sancov',
+    'd8.test.3.sancov',
+    'd8.test.4.sancov',
+    'd8.test.5.sancov',
+    'd8.test.6.sancov',
+    'd8.test.7.sancov',
+  ],
+  'cctest': [
+    'cctest.test.1.sancov',
+    'cctest.test.2.sancov',
+    'cctest.test.3.sancov',
+    'cctest.test.4.sancov',
+  ],
+}
+
+
+# Inputs for merge process with 2 cpus. The tuples contain:
+# (flag, path, executable name, intermediate result index, file list).
+EXPECTED_INPUTS_2 = [
+  (False, '/some/path', 'cctest', 0, [
+    'cctest.test.1.sancov',
+    'cctest.test.2.sancov']),
+  (False, '/some/path', 'cctest', 1, [
+    'cctest.test.3.sancov',
+    'cctest.test.4.sancov']),
+  (False, '/some/path', 'd8', 0, [
+    'd8.test.1.sancov',
+    'd8.test.2.sancov',
+    'd8.test.3.sancov',
+    'd8.test.4.sancov']),
+  (False, '/some/path', 'd8', 1, [
+    'd8.test.5.sancov',
+    'd8.test.6.sancov',
+    'd8.test.7.sancov']),
+]
+
+
+# The same for 4 cpus.
+EXPECTED_INPUTS_4 = [
+  (True, '/some/path', 'cctest', 0, [
+    'cctest.test.1.sancov',
+    'cctest.test.2.sancov']),
+  (True, '/some/path', 'cctest', 1, [
+    'cctest.test.3.sancov',
+    'cctest.test.4.sancov']),
+  (True, '/some/path', 'd8', 0, [
+    'd8.test.1.sancov',
+    'd8.test.2.sancov']),
+  (True, '/some/path', 'd8', 1, [
+    'd8.test.3.sancov',
+    'd8.test.4.sancov']),
+  (True, '/some/path', 'd8', 2, [
+    'd8.test.5.sancov',
+    'd8.test.6.sancov']),
+  (True, '/some/path', 'd8', 3, [
+    'd8.test.7.sancov'])]
+
+
+class MergerTests(unittest.TestCase):
+  def test_generate_inputs_2_cpu(self):
+    inputs = sancov_merger.generate_inputs(
+        False, '/some/path', FILE_MAP, 2)
+    self.assertEquals(EXPECTED_INPUTS_2, inputs)
+
+  def test_generate_inputs_4_cpu(self):
+    inputs = sancov_merger.generate_inputs(
+        True, '/some/path', FILE_MAP, 4)
+    self.assertEquals(EXPECTED_INPUTS_4, inputs)
--- a/tools/sanitizers/sanitize_pcs.py
+++ b/tools/sanitizers/sanitize_pcs.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Corrects objdump output. The logic is from sancov.py, see comments there."""
+
+import sys;
+
+for line in sys.stdin:
+  print '0x%x' % (int(line.strip(), 16) + 4)
--- a/tools/testrunner/local/commands.py
+++ b/tools/testrunner/local/commands.py
@ -107,14 +107,16 @@ def RunProcess(verbose, timeout, args, **rest):
  timer.start()
  stdout, stderr = process.communicate()
  timer.cancel()
-  return process.returncode, timeout_result[0], stdout, stderr
+
+  return output.Output(
+      process.returncode,
+      timeout_result[0],
+      stdout,
+      stderr,
+      process.pid,
+  )


 def Execute(args, verbose=False, timeout=None):
  args = [ c for c in args if c != "" ]
-  exit_code, timed_out, stdout, stderr = RunProcess(
-    verbose,
-    timeout,
-    args=args,
-  )
-  return output.Output(exit_code, timed_out, stdout, stderr)
+  return RunProcess(verbose, timeout, args=args)
--- a/tools/testrunner/local/execution.py
+++ b/tools/testrunner/local/execution.py
@ -144,6 +144,26 @@ class TestJob(Job):
  def __init__(self, test):
    self.test = test

+  def _rename_coverage_data(self, output, context):
+    """Rename coverage data.
+
+    Rename files with PIDs to files with unique test IDs, because the number
+    of tests might be higher than pid_max. E.g.:
+    d8.1234.sancov -> d8.test.1.sancov, where 1234 was the process' PID
+    and 1 is the test ID.
+    """
+    if context.sancov_dir:
+      sancov_file = os.path.join(
+          context.sancov_dir, "%s.%d.sancov" % (self.test.shell(), output.pid))
+
+      # Some tests are expected to fail and don't produce coverage data.
+      if os.path.exists(sancov_file):
+        parts = sancov_file.split(".")
+        new_sancov_file = ".".join(
+            parts[:-2] + ["test", str(self.test.id)] + parts[-1:])
+        assert not os.path.exists(new_sancov_file)
+        os.rename(sancov_file, new_sancov_file)
+
  def Run(self, process_context):
    try:
      # Retrieve a new suite object on the worker-process side. The original
@ -155,6 +175,7 @@ class TestJob(Job):

    start_time = time.time()
    output = commands.Execute(instr.command, instr.verbose, instr.timeout)
+    self._rename_coverage_data(output, process_context.context)
    return (instr.id, output, time.time() - start_time)


--- a/tools/testrunner/objects/context.py
+++ b/tools/testrunner/objects/context.py
@ -30,7 +30,7 @@ class Context():
  def __init__(self, arch, mode, shell_dir, mode_flags, verbose, timeout,
               isolates, command_prefix, extra_flags, noi18n, random_seed,
               no_sorting, rerun_failures_count, rerun_failures_max,
-               predictable, no_harness, use_perf_data):
+               predictable, no_harness, use_perf_data, sancov_dir):
    self.arch = arch
    self.mode = mode
    self.shell_dir = shell_dir
@ -48,13 +48,14 @@ class Context():
    self.predictable = predictable
    self.no_harness = no_harness
    self.use_perf_data = use_perf_data
+    self.sancov_dir = sancov_dir

  def Pack(self):
    return [self.arch, self.mode, self.mode_flags, self.timeout, self.isolates,
            self.command_prefix, self.extra_flags, self.noi18n,
            self.random_seed, self.no_sorting, self.rerun_failures_count,
            self.rerun_failures_max, self.predictable, self.no_harness,
-            self.use_perf_data]
+            self.use_perf_data, self.sancov_dir]

  @staticmethod
  def Unpack(packed):
@ -62,4 +63,4 @@ class Context():
    return Context(packed[0], packed[1], None, packed[2], False,
                   packed[3], packed[4], packed[5], packed[6], packed[7],
                   packed[8], packed[9], packed[10], packed[11], packed[12],
-                   packed[13], packed[14])
+                   packed[13], packed[14], packed[15])
--- a/tools/testrunner/objects/output.py
+++ b/tools/testrunner/objects/output.py
@ -32,11 +32,12 @@ from ..local import utils

 class Output(object):

-  def __init__(self, exit_code, timed_out, stdout, stderr):
+  def __init__(self, exit_code, timed_out, stdout, stderr, pid):
    self.exit_code = exit_code
    self.timed_out = timed_out
    self.stdout = stdout
    self.stderr = stderr
+    self.pid = pid

  def HasCrashed(self):
    if utils.IsWindows():
@ -52,9 +53,9 @@ class Output(object):
    return self.timed_out

  def Pack(self):
-    return [self.exit_code, self.timed_out, self.stdout, self.stderr]
+    return [self.exit_code, self.timed_out, self.stdout, self.stderr, self.pid]

  @staticmethod
  def Unpack(packed):
    # For the order of the fields, refer to Pack() above.
-    return Output(packed[0], packed[1], packed[2], packed[3])
+    return Output(packed[0], packed[1], packed[2], packed[3], packed[4])