v8/tools/gcmole/gcmole.py
Michael Achenbach 2b133ebc9e [gcmole] Simplify gcmole directives and add tests
This prepares work for speeding up gcmole infrastructure by adding
more checks and testing.

- Add a presubmit check that verifies gcmole directives in gn files.
- Simplify gcmole directives in gn files. Drop all but the
  architecture property as the others were unused. Simplify regular
  expressions.
- Fix some gcmole directives (fly by - only on untested
  architectures).
- Add a python test for the changed function in gcmole.py. There's
  no other testing yet and this starts somewhere. The test simulates
  real BUILD.gn files via checked-in test data.

Bug: v8:12660
Change-Id: I9ca3152b2c3b806de5d4509c152283b36294950d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4020268
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Reviewed-by: Alexander Schulze <alexschulze@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84239}
2022-11-14 13:07:15 +00:00

742 lines
23 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]
from multiprocessing import cpu_count
from pathlib import Path
import collections
import difflib
import json
import optparse
import os
import re
import subprocess
import sys
import threading
import queue
ArchCfg = collections.namedtuple(
"ArchCfg", ["name", "cpu", "triple", "arch_define", "arch_options"])
# TODO(cbruni): use gn desc by default for platform-specific settings
OPTIONS_64BIT = [
"-DV8_COMPRESS_POINTERS",
"-DV8_COMPRESS_POINTERS_IN_SHARED_CAGE",
"-DV8_EXTERNAL_CODE_SPACE",
"-DV8_SHORT_BUILTIN_CALLS",
"-DV8_SHARED_RO_HEAP",
]
ARCHITECTURES = {
"ia32":
ArchCfg(
name="ia32",
cpu="x86",
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_IA32",
arch_options=["-m32"],
),
"arm":
ArchCfg(
name="arm",
cpu="arm",
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM",
arch_options=["-m32"],
),
# TODO(cbruni): Use detailed settings:
# arch_options = OPTIONS_64BIT + [ "-DV8_WIN64_UNWINDING_INFO" ]
"x64":
ArchCfg(
name="x64",
cpu="x64",
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_X64",
arch_options=[]),
"arm64":
ArchCfg(
name="arm64",
cpu="arm64",
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM64",
arch_options=[],
),
}
ARCHITECTURES['x86'] = ARCHITECTURES['ia32']
def log(format, *args, **kwargs):
mark = ("#", "=", "-", ".")[kwargs.get("level", 0)]
print(mark * 2, str(format).format(*list(map(str, args))))
def fatal(format):
log(format)
sys.exit(1)
# -----------------------------------------------------------------------------
# Clang invocation
def make_clang_command_line(plugin, plugin_args, options):
arch_cfg = ARCHITECTURES[options.v8_target_cpu]
prefixed_plugin_args = []
if plugin_args:
for arg in plugin_args:
prefixed_plugin_args += [
"-Xclang",
"-plugin-arg-" + plugin,
"-Xclang",
arg,
]
log("Using generated files in {}", options.v8_build_dir / 'gen')
icu_src_dir = options.v8_root_dir / 'third_party/icu/source'
return ([
options.clang_bin_dir / "clang++",
"-std=c++17",
"-c",
"-Xclang",
"-load",
"-Xclang",
options.clang_plugins_dir / "libgcmole.so",
"-Xclang",
"-plugin",
"-Xclang",
plugin,
] + prefixed_plugin_args + [
"-Xclang",
"-triple",
"-Xclang",
arch_cfg.triple,
"-fno-exceptions",
"-Wno-everything",
"-D",
arch_cfg.arch_define,
"-DENABLE_DEBUGGER_SUPPORT",
"-DV8_ENABLE_WEBASSEMBLY",
"-DV8_GC_MOLE",
"-DV8_INTL_SUPPORT",
"-I{}".format(options.v8_root_dir),
"-I{}".format(options.v8_root_dir / 'include'),
"-I{}".format(options.v8_build_dir / 'gen'),
"-I{}".format(icu_src_dir / 'common'),
"-I{}".format(icu_src_dir / 'i18n'),
] + arch_cfg.arch_options)
def invoke_clang_plugin_for_file(filename, cmd_line, verbose):
args = cmd_line + [filename]
args = list(map(str, args))
if verbose:
print("popen ", " ".join(args))
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return p.returncode, stdout.decode("utf-8"), stderr.decode("utf-8")
def invoke_clang_plugin_for_files_in_queue(i, input_queue, output_queue,
cancel_event, cmd_line, verbose):
success = False
try:
while not cancel_event.is_set():
filename = input_queue.get_nowait()
ret, stdout, stderr = invoke_clang_plugin_for_file(
filename, cmd_line, verbose)
output_queue.put_nowait((filename, ret, stdout, stderr))
if ret != 0:
break
except KeyboardInterrupt:
log("[{}] Interrupting", i, level=1)
except queue.Empty:
success = True
finally:
# Emit a success bool so that the reader knows that there was either an
# error or all files were processed.
output_queue.put_nowait(success)
def invoke_clang_plugin_for_each_file(filenames, plugin, plugin_args, options):
cmd_line = make_clang_command_line(plugin, plugin_args, options)
verbose = options.verbose
if options.sequential:
log("Sequential execution.")
for filename in filenames:
log(filename, level=1)
returncode, stdout, stderr = invoke_clang_plugin_for_file(
filename, cmd_line, verbose)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
else:
log("Parallel execution.")
cpus = cpu_count()
input_queue = queue.Queue()
output_queue = queue.Queue()
threads = []
try:
for filename in filenames:
input_queue.put(filename)
cancel_event = threading.Event()
for i in range(min(len(filenames), cpus)):
threads.append(
threading.Thread(
target=invoke_clang_plugin_for_files_in_queue,
args=(i, input_queue, output_queue, cancel_event, cmd_line,
verbose)))
for t in threads:
t.start()
num_finished = 0
while num_finished < len(threads):
output = output_queue.get()
if type(output) == bool:
if output:
num_finished += 1
continue
else:
break
filename, returncode, stdout, stderr = output
log(filename, level=2)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
finally:
cancel_event.set()
for t in threads:
t.join()
# -----------------------------------------------------------------------------
def build_file_list(options, for_test):
"""Calculates the list of source files to be checked with gcmole.
The list comprises all files from marked source sections in the
listed BUILD.gn files. All files preceeded by the following comment and
until the end of the source section are used:
### gcmole(arch) ###
Where arch can either be all (all architectures) or one of the supported V8
architectures.
The structure of these directives is also checked by presubmit via:
tools/v8_presubmit.py::GCMoleProcessor.
Returns: List of file paths (of type Path).
"""
if for_test:
return [options.v8_root_dir / "tools/gcmole/gcmole-test.cc"]
result = []
gn_files = [
("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
Path("test/cctest/")),
]
gn_re = re.compile(f"### gcmole\((all|{options.v8_target_cpu})\) ###(.*?)\]",
re.MULTILINE | re.DOTALL)
for filename, file_pattern, prefix in gn_files:
path = options.v8_root_dir / filename
with open(path) as gn_file:
gn = gn_file.read()
for _, sources in gn_re.findall(gn):
for file in file_pattern.findall(sources):
result.append(options.v8_root_dir / prefix / file)
return result
# -----------------------------------------------------------------------------
# GCSuspects Generation
# Note that the gcsuspects file lists functions in the form:
# mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = [
# The following functions call CEntryStub which is always present.
"MacroAssembler.*,CallRuntime",
"CompileCallLoadPropertyWithInterceptor",
"CallIC.*,GenerateMiss",
# DirectCEntryStub is a special stub used on ARM.
# It is pinned and always present.
"DirectCEntryStub.*,GenerateCall",
# TODO GCMole currently is sensitive enough to understand that certain
# functions only cause GC and return Failure simulataneously.
# Callsites of such functions are safe as long as they are properly
# check return value and propagate the Failure to the caller.
# It should be possible to extend GCMole to understand this.
"Heap.*,TryEvacuateObject",
# Ignore all StateTag methods.
"StateTag",
# Ignore printing of elements transition.
"PrintElementsTransition",
# CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
"CodeCreateEvent",
"WriteField",
]
GC_PATTERN = ",.*Collect.*Garbage"
SAFEPOINT_PATTERN = ",SafepointSlowPath"
ALLOWLIST_PATTERN = "|".join("(?:{})".format(p) for p in ALLOWLIST)
def merge_regexp(pattern_dict):
return re.compile("|".join("(?P<{}>{})".format(key, value)
for (key, value) in list(pattern_dict.items())))
IS_SPECIAL_WITHOUT_ALLOW_LIST = merge_regexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN
})
IS_SPECIAL_WITH_ALLOW_LIST = merge_regexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN,
"allow": ALLOWLIST_PATTERN
})
class GCSuspectsCollector:
def __init__(self, options):
self.gc = {}
self.gc_caused = collections.defaultdict(lambda: set())
self.funcs = {}
self.current_caller = None
self.allowlist = options.allowlist
self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST
def add_cause(self, name, cause):
self.gc_caused[name].add(cause)
def parse(self, lines):
for funcname in lines:
if not funcname:
continue
if funcname[0] != "\t":
self.resolve(funcname)
self.current_caller = funcname
else:
name = funcname[1:]
callers_for_name = self.resolve(name)
callers_for_name.add(self.current_caller)
def resolve(self, name):
if name not in self.funcs:
self.funcs[name] = set()
m = self.is_special.search(name)
if m:
if m.group("gc"):
self.gc[name] = True
self.add_cause(name, "<GC>")
elif m.group("safepoint"):
self.gc[name] = True
self.add_cause(name, "<Safepoint>")
elif m.group("allow"):
self.gc[name] = False
return self.funcs[name]
def propagate(self):
log("Propagating GC information")
def mark(funcname, callers):
for caller in callers:
if caller not in self.gc:
self.gc[caller] = True
mark(caller, self.funcs[caller])
self.add_cause(caller, funcname)
for funcname, callers in list(self.funcs.items()):
if self.gc.get(funcname, False):
mark(funcname, callers)
def generate_gc_suspects(files, options):
# Reset the global state.
collector = GCSuspectsCollector(options)
log("Building GC Suspects for {}", options.v8_target_cpu)
for _, stdout, _ in invoke_clang_plugin_for_each_file(files, "dump-callees",
[], options):
collector.parse(stdout.splitlines())
collector.propagate()
# TODO(cbruni): remove once gcmole.cc is migrated
write_gcmole_results(collector, options, options.v8_root_dir)
write_gcmole_results(collector, options, options.out_dir)
def write_gcmole_results(collector, options, dst):
# gcsuspects contains a list("mangled_full_name,name") of all functions that
# could cause a gc (directly or indirectly).
#
# EXAMPLE
# _ZN2v88internal4Heap16CreateApiObjectsEv,CreateApiObjects
# _ZN2v88internal4Heap17CreateInitialMapsEv,CreateInitialMaps
# ...
with open(dst / "gcsuspects", "w") as out:
for name, value in list(collector.gc.items()):
if value:
out.write(name + "\n")
# gccauses contains a map["mangled_full_name,name"] => list(inner gcsuspects)
# Where the inner gcsuspects are functions directly called in the outer
# function that can cause a gc. The format is encoded for simplified
# deserialization in gcmole.cc.
#
# EXAMPLE:
# _ZN2v88internal4Heap17CreateHeapObjectsEv,CreateHeapObjects
# start,nested
# _ZN2v88internal4Heap16CreateApiObjectsEv,CreateApiObjects
# _ZN2v88internal4Heap17CreateInitialMapsEv,CreateInitialMaps
# ...
# end,nested
# ...
with open(dst / "gccauses", "w") as out:
for name, causes in list(collector.gc_caused.items()):
out.write("{}\n".format(name))
out.write("start,nested\n")
for cause in causes:
out.write("{}\n".format(cause))
out.write("end,nested\n")
log("GCSuspects and gccauses generated for {} in '{}'", options.v8_target_cpu,
dst)
# ------------------------------------------------------------------------------
# Analysis
def check_correctness_for_arch(options, for_test):
files = build_file_list(options, for_test)
if not options.reuse_gcsuspects:
generate_gc_suspects(files, options)
else:
log("Reusing GCSuspects for {}", options.v8_target_cpu)
processed_files = 0
errors_found = False
output = ""
log("Searching for evaluation order problems " +
(' and dead variables' if options.dead_vars else '') + "for" +
options.v8_target_cpu)
plugin_args = []
if options.dead_vars:
plugin_args.append("--dead-vars")
if options.verbose:
plugin_args.append("--verbose")
if options.verbose_trace:
plugin_args.append("--verbose-trace")
for _, _, stderr in invoke_clang_plugin_for_each_file(files, "find-problems",
plugin_args, options):
processed_files = processed_files + 1
if not errors_found:
errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr,
re.MULTILINE) is not None
if for_test:
output = output + stderr
else:
sys.stdout.write(stderr)
log("Done processing {} files.", processed_files)
log("Errors found" if errors_found else "No errors found")
return errors_found, output
def test_run(options):
if not options.test_run:
return True
log("Test Run")
errors_found, output = check_correctness_for_arch(options, True)
if not errors_found:
log("Test file should produce errors, but none were found. Output:")
print(output)
return False
new_file = options.out_dir / "test-expectations-gen.txt"
with open(new_file, "w") as f:
f.write(output)
log("Wrote test-results: {}", new_file)
expected_file = options.v8_root_dir / "tools/gcmole/test-expectations.txt"
with open(expected_file) as exp_file:
expectations = exp_file.read()
if output != expectations:
diff_file = options.out_dir / "test_output.diff"
print("#" * 79)
log("Output mismatch from running tests.")
log("Please run gcmole manually with --test-run --verbose.")
log("Expected: " + expected_file)
log("New: " + new_file)
log("*Diff:* " + diff_file)
print("#" * 79)
for line in difflib.unified_diff(
expectations.splitlines(),
output.splitlines(),
fromfile=str(new_file),
tofile=str(diff_file),
lineterm="",
):
print(line)
print("#" * 79)
log("Full output")
log("Expected: " + expected_file)
log("Diff: " + diff_file)
log("*New:* " + new_file)
print("#" * 79)
print(output)
print("#" * 79)
return False
log("Tests ran successfully")
return True
# =============================================================================
def relative_parents(path, level=0):
return Path(os.path.relpath(str(path.resolve().parents[level])))
def main(args):
# Get a clean parent path relative to PWD
default_root_dir = relative_parents(Path(__file__), level=2)
if len(args) >= 1:
default_gcmole_dir = relative_parents(Path(args[0]))
if default_gcmole_dir or not default_gcmole_dir.exists():
default_gcmole_dir = default_root_dir / 'tools' / 'gcmole'
parser = optparse.OptionParser()
archs = list(ARCHITECTURES.keys())
parser.add_option(
"--v8-root-dir",
metavar="DIR",
default=default_root_dir,
help="V8 checkout directory. Default: '{}'".format(
default_root_dir.absolute()))
parser.add_option(
"--v8-target-cpu",
type="choice",
default="x64",
choices=archs,
help="Tested CPU architecture. Choices: {}".format(archs),
metavar="CPU")
default_clang_bin_dir = default_gcmole_dir / 'gcmole-tools/bin'
parser.add_option(
"--clang-bin-dir",
metavar="DIR",
help="Build dir of the custom clang version for gcmole." + \
"Default: env['CLANG_DIR'] or '{}'".format(default_clang_bin_dir))
parser.add_option(
"--clang-plugins-dir",
metavar="DIR",
help="Containing dir for libgcmole.so."
"Default: env['CLANG_PLUGINS'] or '{}'".format(default_gcmole_dir))
parser.add_option(
"--v8-build-dir",
metavar="BUILD_DIR",
help="GN build dir for v8. Default: 'out/CPU.Release'. "
"Config must match cpu specified by --v8-target-cpu")
parser.add_option(
"--out-dir",
metavar="DIR",
help="Output location for the gcsuspect and gcauses file."
"Default: BUILD_DIR/gen/tools/gcmole")
parser.add_option(
"--is-bot",
action="store_true",
default=False,
help="Flag for setting build bot specific settings.")
group = optparse.OptionGroup(parser, "GCMOLE options")
group.add_option(
"--reuse-gcsuspects",
action="store_true",
default=False,
help="Don't build gcsuspects file and reuse previously generated one.")
group.add_option(
"--sequential",
action="store_true",
default=False,
help="Don't use parallel python runner.")
group.add_option(
"--verbose",
action="store_true",
default=False,
help="Print commands to console before executing them.")
group.add_option(
"--no-dead-vars",
action="store_false",
dest="dead_vars",
default=True,
help="Don't perform dead variable analysis.")
group.add_option(
"--verbose-trace",
action="store_true",
default=False,
help="Enable verbose tracing from the plugin itself."
"This can be useful to debug finding dead variable.")
group.add_option(
"--no-allowlist",
action="store_true",
default=True,
dest="allowlist",
help="When building gcsuspects allowlist certain functions as if they can be "
"causing GC. Currently used to reduce number of false positives in dead "
"variables analysis. See TODO for ALLOWLIST in gcmole.py")
group.add_option(
"--test-run",
action="store_true",
default=False,
help="Test gcmole on tools/gcmole/gcmole-test.cc")
parser.add_option_group(group)
(options, args) = parser.parse_args()
if not options.v8_target_cpu:
# Backwards compatibility
if len(args) > 0 and args[0] in archs:
options.v8_target_cpu = args[0]
log("Using --v8-target-cpu={}", options.v8_target_cpu)
else:
parser.error("Missing --v8-target-cpu option")
verify_and_convert_dirs(parser, options, default_gcmole_dir,
default_clang_bin_dir)
verify_clang_plugin(parser, options)
prepare_gcmole_files(options)
verify_build_config(parser, options)
any_errors_found = False
if not test_run(options):
any_errors_found = True
else:
errors_found, output = check_correctness_for_arch(options, False)
any_errors_found = any_errors_found or errors_found
sys.exit(1 if any_errors_found else 0)
def verify_and_convert_dirs(parser, options, default_tools_gcmole_dir,
default_clang_bin_dir):
# Verify options for setting directors and convert the input strings to Path
# objects.
options.v8_root_dir = Path(options.v8_root_dir)
if not options.clang_bin_dir:
# Backwards compatibility
if os.getenv("CLANG_BIN"):
options.clang_bin_dir = Path(os.getenv("CLANG_BIN"))
options.is_bot = True
else:
options.clang_bin_dir = default_clang_bin_dir
if not (options.clang_bin_dir / 'clang++').exists():
options.clang_bin_dir = Path(options.v8_root_dir,
"tools/gcmole/bootstrap/build/bin")
log("Using --clang-bin-dir={}", options.clang_bin_dir)
else:
options.clang_bin_dir = Path(options.clang_bin_dir)
if not options.clang_plugins_dir:
# Backwards compatibility
if os.getenv("CLANG_PLUGINS"):
options.clang_plugins_dir = Path(os.getenv("CLANG_PLUGINS"))
else:
options.clang_plugins_dir = default_tools_gcmole_dir.resolve()
log("Using --clang-plugins-dir={}", options.clang_plugins_dir)
else:
options.clang_plugins_dir = Path(options.clang_plugins_dir)
if not options.v8_build_dir:
config = ARCHITECTURES[options.v8_target_cpu]
options.v8_build_dir = options.v8_root_dir / ('out/%s.release' %
config.name)
# Fallback for build bots.
if not options.v8_build_dir.exists() and os.getenv("CLANG_BIN"):
options.v8_build_dir = options.v8_root_dir / 'out/build'
log("Using --v8-build-dir={}", options.v8_build_dir)
else:
options.v8_build_dir = Path(options.v8_build_dir)
if not options.out_dir:
options.out_dir = options.v8_build_dir / 'gen/tools/gcmole'
if options.v8_build_dir.exists():
options.out_dir.mkdir(parents=True, exist_ok=True)
else:
options.out_dir = Path(options.out_dir)
for flag in [
"--v8-root-dir", "--v8-build-dir", "--clang-bin-dir",
"--clang-plugins-dir", "--out-dir"
]:
dir = getattr(options, parser.get_option(flag).dest)
if not dir.is_dir():
parser.error("{}='{}' does not exist!".format(flag, dir))
def verify_clang_plugin(parser, options):
libgcmole_path = options.clang_plugins_dir / "libgcmole.so"
if not libgcmole_path.is_file():
parser.error("'{}' does not exist. Please build gcmole first.".format(
libgcmole_path))
clang_path = options.clang_bin_dir / "clang++"
if not clang_path.is_file():
parser.error(
"'{}' does not exist. Please build gcmole first.".format(clang_path))
def prepare_gcmole_files(options):
cmd = [
"ninja", "-C", options.v8_build_dir, "v8_gcmole_files",
"v8_dump_build_config"
]
cmd = list(map(str, cmd))
log("Preparing files: {}", " ".join(cmd))
try:
subprocess.check_call(cmd)
except:
# Ignore ninja task errors on the bots
if options.is_bot:
log("Ninja command failed, ignoring errors.")
else:
raise
def verify_build_config(parser, options):
if options.is_bot:
#TODO(cbruni): Fix, currently not supported on the bots
return
config_file = options.v8_build_dir / 'v8_build_config.json'
with open(config_file) as f:
config = json.load(f)
found_cpu = None
for key in ('v8_target_cpu', 'target_cpu', 'current_cpu'):
found_cpu = config.get('v8_target_cpu')
if found_cpu == options.v8_target_cpu:
return
parser.error("Build dir '{}' config doesn't match request cpu. {}: {}".format(
options.v8_build_dir, options.v8_target_cpu, found_cpu))
if __name__ == "__main__":
main(sys.argv)