[gcmole] Add API for running on multiple hosts

This adds new modes to run gcmole on multiple hosts:
- collect: Parse source files and store partial raw call graph in
a binary output file.
- merge: Read multiple partial call graphs stored by 'collect', merge
them and store the gc-suspect information.
- check: Use the stored suspect information to check source files for
problems.

The modes 'collect' and 'check' support sharding parameters, so that
each shard runs on a deterministic fraction of the source files.

The previous way of running gcmole locally is unchanged as 'full'
run. All modes also support the --test-run parameter, running against
a test double.

The parameter to reuse suspects is removed as the new 'check' mode
makes it redundant.

This also adds python tests to verify most additions for merging
call graphs and for distributing source files to shards.

The new API is used on the infra side via the gcmole_v3 config, which
we configure now in parallel to the previous version to test for
equality.

Based on the builders.pyl change, these additions also run on the
v8_linux_rel trybot on this CL.

Bug: v8:12660
Change-Id: Ibe66292cb00830fa1c23451081a8de4cbc4766a1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4075907
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Reviewed-by: Liviu Rau <liviurau@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84674}
This commit is contained in:
Michael Achenbach 2022-12-05 16:22:46 +01:00 committed by V8 LUCI CQ
parent 6ec1df0b92
commit 2d51120a9d
4 changed files with 211 additions and 31 deletions

View File

@ -184,6 +184,11 @@
{'name': 'gcmole_v2', 'variant': 'x64'},
{'name': 'gcmole_v2', 'variant': 'arm'},
{'name': 'gcmole_v2', 'variant': 'arm64'},
# TODO(https://crbug.com/v8/12660): Remove v2 above after testing.
{'name': 'gcmole_v3', 'variant': 'ia32', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'x64', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'arm', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'arm64', 'shards': 4},
],
},
'v8_linux_optional_rel': {
@ -1076,6 +1081,11 @@
{'name': 'gcmole_v2', 'variant': 'x64'},
{'name': 'gcmole_v2', 'variant': 'arm'},
{'name': 'gcmole_v2', 'variant': 'arm64'},
# TODO(https://crbug.com/v8/12660): Remove v2 above after testing.
{'name': 'gcmole_v3', 'variant': 'ia32', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'x64', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'arm', 'shards': 4},
{'name': 'gcmole_v3', 'variant': 'arm64', 'shards': 4},
],
},
'V8 Linux - arm64 - sim - CFI': {

View File

@ -17,6 +17,7 @@ import difflib
import io
import json
import os
import pickle
import re
import subprocess
import sys
@ -257,7 +258,11 @@ def build_file_list(options):
for file in file_pattern.findall(sources):
result.append(options.v8_root_dir / prefix / file)
return result
# Filter files of current shard if running on multiple hosts.
def is_in_shard(index):
return (index % options.shard_count) == options.shard_index
return [f for i, f in enumerate(result) if is_in_shard(i)]
# -----------------------------------------------------------------------------
@ -331,6 +336,29 @@ class CallGraph:
else:
self.funcs[funcname[1:]].add(self.current_caller)
def to_file(self, file_name):
"""Store call graph in file 'file_name'."""
log(f"Writing serialized callgraph to {file_name}")
with open(file_name, 'wb') as f:
pickle.dump(self, f)
@staticmethod
def from_file(file_name):
"""Restore call graph from file 'file_name'."""
log(f"Reading serialized callgraph from {file_name}")
with open(file_name, 'rb') as f:
return pickle.load(f)
@staticmethod
def from_files(*file_names):
"""Merge multiple call graphs from a list of files."""
callgraph = CallGraph()
for file_name in file_names:
funcs = CallGraph.from_file(file_name).funcs
for callee, callers in funcs.items():
callgraph.funcs[callee].update(callers)
return callgraph
class GCSuspectsCollector:
@ -378,22 +406,37 @@ class GCSuspectsCollector:
mark(funcname)
def generate_gc_suspects(files, options):
# Reset the global state.
call_graph = CallGraph()
def generate_callgraph(files, options):
"""Construct a (potentially partial) call graph from a subset of
source files.
"""
callgraph = CallGraph()
log("Building GC Suspects for {}", options.v8_target_cpu)
for _, stdout, _ in invoke_clang_plugin_for_each_file(files, "dump-callees",
[], options):
call_graph.parse(stdout.splitlines())
log(f"Building call graph for {options.v8_target_cpu}")
for _, stdout, _ in invoke_clang_plugin_for_each_file(
files, "dump-callees", [], options):
callgraph.parse(stdout.splitlines())
collector = GCSuspectsCollector(options, call_graph.funcs)
return callgraph
def generate_gc_suspects_from_callgraph(callgraph, options):
"""Calculate and store gc-suspect information from a given call graph."""
collector = GCSuspectsCollector(options, callgraph.funcs)
collector.propagate()
# TODO(cbruni): remove once gcmole.cc is migrated
write_gcmole_results(collector, options, options.v8_root_dir)
write_gcmole_results(collector, options, options.out_dir)
def generate_gc_suspects_from_files(options):
"""Generate file list and corresponding gc-suspect information."""
files = build_file_list(options)
call_graph = generate_callgraph(files, options)
generate_gc_suspects_from_callgraph(call_graph, options)
return files
def write_gcmole_results(collector, options, dst):
# gcsuspects contains a list("mangled_full_name,name") of all functions that
# could cause a gc (directly or indirectly).
@ -434,19 +477,12 @@ def write_gcmole_results(collector, options, dst):
# Analysis
def check_correctness_for_arch(options):
files = build_file_list(options)
if not options.reuse_gcsuspects:
generate_gc_suspects(files, options)
else:
log("Reusing GCSuspects for {}", options.v8_target_cpu)
def check_correctness_for_arch(files, options):
processed_files = 0
errors_found = False
log("Searching for evaluation order problems " +
(' and dead variables' if options.dead_vars else '') + "for" +
("and dead variables " if options.dead_vars else "") + "for " +
options.v8_target_cpu)
plugin_args = []
if options.dead_vars:
@ -580,13 +616,20 @@ def main(argv):
action="store_true",
default=False,
help="Flag for setting build bot specific settings.")
parser.add_argument(
"--shard-count",
default=1,
type=int,
help="Number of tasks the current action (e.g. collect or check) "
"is distributed to.")
parser.add_argument(
"--shard-index",
default=0,
type=int,
help="Index of the current task (in [0..shard-count-1]) if the "
"overall action is distributed (shard-count > 1).")
group = parser.add_argument_group("GCMOLE options")
group.add_argument(
"--reuse-gcsuspects",
action="store_true",
default=False,
help="Don't build gcsuspects file and reuse previously generated one.")
group.add_argument(
"--sequential",
action="store_true",
@ -631,6 +674,38 @@ def main(argv):
add_common_args(subp)
subp.set_defaults(func=full_run)
subp = subps.add_parser(
"collect",
description="Construct call graph from source files. "
"The action can be distributed using --shard-count and "
"--shard-index.")
add_common_args(subp)
subp.set_defaults(func=collect_run)
subp.add_argument(
"--output",
required=True,
help="Path to a file where to store the constructed call graph")
subp = subps.add_parser(
"merge",
description="Merge partial call graphs and propagate gc suspects.")
add_common_args(subp)
subp.set_defaults(func=merge_run)
subp.add_argument(
"--input",
action='append',
required=True,
help="Path to a file containing a partial call graph stored by "
"'collect'. Repeat for multiple files.")
subp = subps.add_parser(
"check",
description="Check for problems using previously collected gc-suspect "
"information. The action can be distributed using "
"--shard-count and --shard-index.")
add_common_args(subp)
subp.set_defaults(func=check_run)
options = parser.parse_args(argv[1:])
verify_and_convert_dirs(parser, options, default_gcmole_dir,
@ -638,6 +713,7 @@ def main(argv):
verify_clang_plugin(parser, options)
prepare_gcmole_files(options)
verify_build_config(parser, options)
override_env_options(options)
options.func(options)
@ -649,12 +725,31 @@ def maybe_redirect_stderr(options):
yield f
def full_run(options):
def check_files(options, files):
with maybe_redirect_stderr(options) as file_io:
errors_found = check_correctness_for_arch(options)
errors_found = check_correctness_for_arch(files, options)
sys.exit(has_unexpected_errors(options, errors_found, file_io))
def full_run(options):
check_files(options, generate_gc_suspects_from_files(options))
def collect_run(options):
files = build_file_list(options)
callgraph = generate_callgraph(files, options)
callgraph.to_file(options.output)
def merge_run(options):
generate_gc_suspects_from_callgraph(
CallGraph.from_files(*options.input), options)
def check_run(options):
check_files(options, build_file_list(options))
def verify_and_convert_dirs(parser, options, default_tools_gcmole_dir,
default_clang_bin_dir):
# Verify options for setting directors and convert the input strings to Path
@ -758,5 +853,13 @@ def verify_build_config(parser, options):
options.v8_build_dir, options.v8_target_cpu, found_cpu))
def override_env_options(options):
"""Set shard options if passed as gtest environment vars on bots."""
options.shard_count = int(
os.environ.get('GTEST_TOTAL_SHARDS', options.shard_count))
options.shard_index = int(
os.environ.get('GTEST_SHARD_INDEX', options.shard_index))
if __name__ == "__main__":
main(sys.argv)

View File

@ -18,7 +18,8 @@ TESTDATA_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'testdata', 'v8')
Options = collections.namedtuple(
'Options', ['v8_root_dir', 'v8_target_cpu', 'test_run'])
'Options', ['v8_root_dir', 'v8_target_cpu', 'shard_count', 'shard_index',
'test_run'])
def abs_test_file(f):
@ -28,13 +29,13 @@ def abs_test_file(f):
class FilesTest(unittest.TestCase):
def testFileList_for_testing(self):
options = Options(Path(TESTDATA_PATH), 'x64', True)
options = Options(Path(TESTDATA_PATH), 'x64', 1, 0, True)
self.assertEqual(
gcmole.build_file_list(options),
list(map(abs_test_file, ['tools/gcmole/gcmole-test.cc'])))
def testFileList_x64(self):
options = Options(Path(TESTDATA_PATH), 'x64', False)
options = Options(Path(TESTDATA_PATH), 'x64', 1, 0, False)
expected = [
'file1.cc',
'file2.cc',
@ -49,8 +50,32 @@ class FilesTest(unittest.TestCase):
gcmole.build_file_list(options),
list(map(abs_test_file, expected)))
def testFileList_x64_shard0(self):
options = Options(Path(TESTDATA_PATH), 'x64', 2, 0, False)
expected = [
'file1.cc',
'x64/file1.cc',
'file3.cc',
'test/cctest/test-x64-file1.cc',
]
self.assertEqual(
gcmole.build_file_list(options),
list(map(abs_test_file, expected)))
def testFileList_x64_shard1(self):
options = Options(Path(TESTDATA_PATH), 'x64', 2, 1, False)
expected = [
'file2.cc',
'x64/file2.cc',
'file4.cc',
'test/cctest/test-x64-file2.cc',
]
self.assertEqual(
gcmole.build_file_list(options),
list(map(abs_test_file, expected)))
def testFileList_arm(self):
options = Options(Path(TESTDATA_PATH), 'arm', False)
options = Options(Path(TESTDATA_PATH), 'arm', 1, 0, False)
expected = [
'file1.cc',
'file2.cc',
@ -126,6 +151,49 @@ class SuspectCollectorTest(unittest.TestCase):
call_graph.funcs,
{'A': set(), 'B': set('A'), 'C': set(['A', 'B']), 'D': set('B')})
def testCallGraphMerge(self):
"""Test serializing, deserializing and merging call graphs."""
temp_dir = Path(tempfile.mkdtemp('gcmole_test'))
call_graph1 = self.create_callgraph(
OutputLines('B → C D E', 'D →'), OutputLines('A → B C'))
self.assertDictEqual(
call_graph1.funcs,
{'A': set(), 'B': set('A'), 'C': set(['A', 'B']), 'D': set('B'),
'E': set('B')})
call_graph2 = self.create_callgraph(
OutputLines('E → A'), OutputLines('C → D F'))
self.assertDictEqual(
call_graph2.funcs,
{'A': set('E'), 'C': set(), 'D': set('C'), 'E': set(), 'F': set('C')})
file1 = temp_dir / 'file1.bin'
file2 = temp_dir / 'file2.bin'
call_graph1.to_file(file1)
call_graph2.to_file(file2)
expected = {'A': set(['E']), 'B': set('A'), 'C': set(['A', 'B']),
'D': set(['B', 'C']), 'E': set(['B']), 'F': set(['C'])}
call_graph = gcmole.CallGraph.from_files(file1, file2)
self.assertDictEqual(call_graph.funcs, expected)
call_graph = gcmole.CallGraph.from_files(file2, file1)
self.assertDictEqual(call_graph.funcs, expected)
call_graph3 = self.create_callgraph(
OutputLines('F → G'), OutputLines('G →'))
self.assertDictEqual(
call_graph3.funcs,
{'G': set('F'), 'F': set()})
file3 = temp_dir / 'file3.bin'
call_graph3.to_file(file3)
call_graph = gcmole.CallGraph.from_files(file1, file2, file3)
self.assertDictEqual(call_graph.funcs, dict(G=set('F'), **expected))
def create_collector(self, outputs):
Options = collections.namedtuple('OptionsForCollector', ['allowlist'])
options = Options(True)

View File

@ -29,9 +29,8 @@ if "--help" in args:
# Different modes of running gcmole. Optional to stay backwards-compatible.
# TODO(https://crbug.com/v8/12660): Add more modes.
mode = 'full'
if args and args[0] in ['full']:
if args and args[0] in ['check', 'collect', 'full', 'merge']:
mode = args[0]
args = args[1:]