v8/tools/gcmole/gcmole.py
Camillo Bruni ecc3cd256a [tools] Improve gcmole part II
Prepare gcmole.cc for the next update:
- Print possible GC locations when discovering stale/dead variables
- Make error messages less confusing for the modern V8 engineer
- Prepare gcmole to read suspects.allowlist instead of .whitelist
- Use more readable variable names
- Only log non-found types with --verbose
- Change the currently unusued gccauses format in gcmole.py and
  support loading it back in gcmole.cc
- Implemented first basic gc call-chain printing (disabled by default)

GCmole packaging:
- Add debug mode to bootstrap.sh build script
- Update gcmole.py run instructions in bootstrap.sh and package.sh

Bug: v8:10009
Change-Id: I369d48baa2980455d2e8f57e7a803d0384fe83f1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3480095
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Maya Lekova <mslekova@chromium.org>
Commit-Queue: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79357}
2022-03-04 01:06:11 +00:00

808 lines
24 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]
# for py2/py3 compatibility
from __future__ import print_function
from multiprocessing import cpu_count
import collections
import difflib
import json
import optparse
import os
import re
import subprocess
import sys
import threading
if sys.version_info.major > 2:
from pathlib import Path
import queue
else:
import Queue as queue
default_open = open
def open(path, *args, **kwargs):
return default_open(str(path), *args, **kwargs)
class Path(object):
def __init__(self, path, *args):
if args:
self._path = os.path.join(str(path), *args)
else:
self._path = str(path)
def __div__(self, other):
return Path(self._path, str(other))
def __str__(self):
return self._path
def resolve(self):
return Path(os.path.abspath(self._path))
@property
def parent(self):
return Path(os.path.dirname(self._path))
@property
def parents(self):
current = self
parents = []
while current._path != "" and current._path != "/":
current = current.parent
parents.append(current)
return parents
def is_file(self):
return os.path.isfile(self._path)
def is_dir(self):
return os.path.isdir(self._path)
def exists(self):
return os.path.exists(self._path)
def mkdir(self, parents=False, exist_ok=False):
if parents and not self.parent.exists():
self.parent.mkdir(parents=True, exist_ok=True)
if exist_ok and self.exists():
return
os.mkdir(self._path)
ArchCfg = collections.namedtuple(
"ArchCfg", ["name", "cpu", "triple", "arch_define", "arch_options"])
# TODO(cbruni): use gn desc by default for platform-specific settings
OPTIONS_64BIT = [
"-DV8_COMPRESS_POINTERS",
"-DV8_COMPRESS_POINTERS_IN_SHARED_CAGE",
"-DV8_EXTERNAL_CODE_SPACE",
"-DV8_SHORT_BUILTIN_CALLS",
"-DV8_SHARED_RO_HEAP",
]
ARCHITECTURES = {
"ia32":
ArchCfg(
name="ia32",
cpu="x86",
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_IA32",
arch_options=["-m32"],
),
"arm":
ArchCfg(
name="arm",
cpu="arm",
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM",
arch_options=["-m32"],
),
# TODO(cbruni): Use detailed settings:
# arch_options = OPTIONS_64BIT + [ "-DV8_WIN64_UNWINDING_INFO" ]
"x64":
ArchCfg(
name="x64",
cpu="x64",
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_X64",
arch_options=[]),
"arm64":
ArchCfg(
name="arm64",
cpu="arm64",
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM64",
arch_options=[],
),
}
ARCHITECTURES['x86'] = ARCHITECTURES['ia32']
def log(format, *args, **kwargs):
mark = ("#", "=", "-", ".")[kwargs.get("level", 0)]
print(mark * 2, str(format).format(*list(map(str, args))))
def fatal(format):
log(format)
sys.exit(1)
# -----------------------------------------------------------------------------
# Clang invocation
def make_clang_command_line(plugin, plugin_args, options):
arch_cfg = ARCHITECTURES[options.v8_target_cpu]
prefixed_plugin_args = []
if plugin_args:
for arg in plugin_args:
prefixed_plugin_args += [
"-Xclang",
"-plugin-arg-" + plugin,
"-Xclang",
arg,
]
log("Using generated files in {}", options.v8_build_dir / 'gen')
icu_src_dir = options.v8_root_dir / 'third_party/icu/source'
return ([
options.clang_bin_dir / "clang++",
"-std=c++17",
"-c",
"-Xclang",
"-load",
"-Xclang",
options.clang_plugins_dir / "libgcmole.so",
"-Xclang",
"-plugin",
"-Xclang",
plugin,
] + prefixed_plugin_args + [
"-Xclang",
"-triple",
"-Xclang",
arch_cfg.triple,
"-fno-exceptions",
"-Wno-everything",
"-D",
arch_cfg.arch_define,
"-DENABLE_DEBUGGER_SUPPORT",
"-DV8_ENABLE_WEBASSEMBLY",
"-DV8_GC_MOLE",
"-DV8_INTL_SUPPORT",
"-I{}".format(options.v8_root_dir),
"-I{}".format(options.v8_root_dir / 'include'),
"-I{}".format(options.v8_build_dir / 'gen'),
"-I{}".format(icu_src_dir / 'common'),
"-I{}".format(icu_src_dir / 'i18n'),
] + arch_cfg.arch_options)
def invoke_clang_plugin_for_file(filename, cmd_line, verbose):
args = cmd_line + [filename]
args = list(map(str, args))
if verbose:
print("popen ", " ".join(args))
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return p.returncode, stdout.decode("utf-8"), stderr.decode("utf-8")
def invoke_clang_plugin_for_files_in_queue(i, input_queue, output_queue,
cancel_event, cmd_line, verbose):
success = False
try:
while not cancel_event.is_set():
filename = input_queue.get_nowait()
ret, stdout, stderr = invoke_clang_plugin_for_file(
filename, cmd_line, verbose)
output_queue.put_nowait((filename, ret, stdout, stderr))
if ret != 0:
break
except KeyboardInterrupt:
log("[{}] Interrupting", i, level=1)
except queue.Empty:
success = True
finally:
# Emit a success bool so that the reader knows that there was either an
# error or all files were processed.
output_queue.put_nowait(success)
def invoke_clang_plugin_for_each_file(filenames, plugin, plugin_args, options):
cmd_line = make_clang_command_line(plugin, plugin_args, options)
verbose = options.verbose
if options.sequential:
log("Sequential execution.")
for filename in filenames:
log(filename, level=1)
returncode, stdout, stderr = invoke_clang_plugin_for_file(
filename, cmd_line, verbose)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
else:
log("Parallel execution.")
cpus = cpu_count()
input_queue = queue.Queue()
output_queue = queue.Queue()
threads = []
try:
for filename in filenames:
input_queue.put(filename)
cancel_event = threading.Event()
for i in range(min(len(filenames), cpus)):
threads.append(
threading.Thread(
target=invoke_clang_plugin_for_files_in_queue,
args=(i, input_queue, output_queue, cancel_event, cmd_line,
verbose)))
for t in threads:
t.start()
num_finished = 0
while num_finished < len(threads):
output = output_queue.get()
if type(output) == bool:
if output:
num_finished += 1
continue
else:
break
filename, returncode, stdout, stderr = output
log(filename, level=2)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
finally:
cancel_event.set()
for t in threads:
t.join()
# -----------------------------------------------------------------------------
def parse_gn_file(options, for_test):
if for_test:
return {"all": [options.v8_root_dir / "tools/gcmole/gcmole-test.cc"]}
result = {}
gn_files = [
("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
Path("test/cctest/")),
]
for filename, pattern, prefix in gn_files:
path = options.v8_root_dir / filename
with open(path) as gn_file:
gn = gn_file.read()
for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
re.MULTILINE | re.DOTALL):
if condition not in result:
result[condition] = []
for file in pattern.findall(sources):
result[condition].append(options.v8_root_dir / prefix / file)
return result
def evaluate_condition(cond, props):
if cond == "all":
return True
m = re.match("(\w+):(\w+)", cond)
if m is None:
fatal("failed to parse condition: {}", cond)
p, v = m.groups()
if p not in props:
fatal("undefined configuration property: {}", p)
return props[p] == v
def build_file_list(options, for_test):
sources = parse_gn_file(options, for_test)
props = {
"os": "linux",
"arch": options.v8_target_cpu,
"mode": "debug",
"simulator": ""
}
ret = []
for condition, files in list(sources.items()):
if evaluate_condition(condition, props):
ret += files
return ret
# -----------------------------------------------------------------------------
# GCSuspects Generation
# Note that the gcsuspects file lists functions in the form:
# mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = [
# The following functions call CEntryStub which is always present.
"MacroAssembler.*,CallRuntime",
"CompileCallLoadPropertyWithInterceptor",
"CallIC.*,GenerateMiss",
# DirectCEntryStub is a special stub used on ARM.
# It is pinned and always present.
"DirectCEntryStub.*,GenerateCall",
# TODO GCMole currently is sensitive enough to understand that certain
# functions only cause GC and return Failure simulataneously.
# Callsites of such functions are safe as long as they are properly
# check return value and propagate the Failure to the caller.
# It should be possible to extend GCMole to understand this.
"Heap.*,TryEvacuateObject",
# Ignore all StateTag methods.
"StateTag",
# Ignore printing of elements transition.
"PrintElementsTransition",
# CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
"CodeCreateEvent",
"WriteField",
]
GC_PATTERN = ",.*Collect.*Garbage"
SAFEPOINT_PATTERN = ",SafepointSlowPath"
ALLOWLIST_PATTERN = "|".join("(?:{})".format(p) for p in ALLOWLIST)
def merge_regexp(pattern_dict):
return re.compile("|".join("(?P<{}>{})".format(key, value)
for (key, value) in list(pattern_dict.items())))
IS_SPECIAL_WITHOUT_ALLOW_LIST = merge_regexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN
})
IS_SPECIAL_WITH_ALLOW_LIST = merge_regexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN,
"allow": ALLOWLIST_PATTERN
})
class GCSuspectsCollector:
def __init__(self, options):
self.gc = {}
self.gc_caused = collections.defaultdict(lambda: set())
self.funcs = {}
self.current_caller = None
self.allowlist = options.allowlist
self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST
def add_cause(self, name, cause):
self.gc_caused[name].add(cause)
def parse(self, lines):
for funcname in lines:
if not funcname:
continue
if funcname[0] != "\t":
self.resolve(funcname)
self.current_caller = funcname
else:
name = funcname[1:]
callers_for_name = self.resolve(name)
callers_for_name.add(self.current_caller)
def resolve(self, name):
if name not in self.funcs:
self.funcs[name] = set()
m = self.is_special.search(name)
if m:
if m.group("gc"):
self.gc[name] = True
self.add_cause(name, "<GC>")
elif m.group("safepoint"):
self.gc[name] = True
self.add_cause(name, "<Safepoint>")
elif m.group("allow"):
self.gc[name] = False
return self.funcs[name]
def propagate(self):
log("Propagating GC information")
def mark(funcname, callers):
for caller in callers:
if caller not in self.gc:
self.gc[caller] = True
mark(caller, self.funcs[caller])
self.add_cause(caller, funcname)
for funcname, callers in list(self.funcs.items()):
if self.gc.get(funcname, False):
mark(funcname, callers)
def generate_gc_suspects(files, options):
# Reset the global state.
collector = GCSuspectsCollector(options)
log("Building GC Suspects for {}", options.v8_target_cpu)
for _, stdout, _ in invoke_clang_plugin_for_each_file(files, "dump-callees",
[], options):
collector.parse(stdout.splitlines())
collector.propagate()
# TODO(cbruni): remove once gcmole.cc is migrated
write_gcmole_results(collector, options, options.v8_root_dir)
write_gcmole_results(collector, options, options.out_dir)
def write_gcmole_results(collector, options, dst):
# gcsuspects contains a list("mangled_full_name,name") of all functions that
# could cause a gc (directly or indirectly).
#
# EXAMPLE
# _ZN2v88internal4Heap16CreateApiObjectsEv,CreateApiObjects
# _ZN2v88internal4Heap17CreateInitialMapsEv,CreateInitialMaps
# ...
with open(dst / "gcsuspects", "w") as out:
for name, value in list(collector.gc.items()):
if value:
out.write(name + "\n")
# gccauses contains a map["mangled_full_name,name"] => list(inner gcsuspects)
# Where the inner gcsuspects are functions directly called in the outer
# function that can cause a gc. The format is encoded for simplified
# deserialization in gcmole.cc.
#
# EXAMPLE:
# _ZN2v88internal4Heap17CreateHeapObjectsEv,CreateHeapObjects
# start,nested
# _ZN2v88internal4Heap16CreateApiObjectsEv,CreateApiObjects
# _ZN2v88internal4Heap17CreateInitialMapsEv,CreateInitialMaps
# ...
# end,nested
# ...
with open(dst / "gccauses", "w") as out:
for name, causes in list(collector.gc_caused.items()):
out.write("{}\n".format(name))
out.write("start,nested\n")
for cause in causes:
out.write("{}\n".format(cause))
out.write("end,nested\n")
log("GCSuspects and gccauses generated for {} in '{}'", options.v8_target_cpu,
dst)
# ------------------------------------------------------------------------------
# Analysis
def check_correctness_for_arch(options, for_test):
files = build_file_list(options, for_test)
if not options.reuse_gcsuspects:
generate_gc_suspects(files, options)
else:
log("Reusing GCSuspects for {}", options.v8_target_cpu)
processed_files = 0
errors_found = False
output = ""
log("Searching for evaluation order problems " +
(' and dead variables' if options.dead_vars else '') + "for" +
options.v8_target_cpu)
plugin_args = []
if options.dead_vars:
plugin_args.append("--dead-vars")
if options.verbose:
plugin_args.append("--verbose")
if options.verbose_trace:
plugin_args.append("--verbose-trace")
for _, _, stderr in invoke_clang_plugin_for_each_file(files, "find-problems",
plugin_args, options):
processed_files = processed_files + 1
if not errors_found:
errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr,
re.MULTILINE) is not None
if for_test:
output = output + stderr
else:
sys.stdout.write(stderr)
log("Done processing {} files.", processed_files)
log("Errors found" if errors_found else "No errors found")
return errors_found, output
def test_run(options):
if not options.test_run:
return True
log("Test Run")
errors_found, output = check_correctness_for_arch(options, True)
if not errors_found:
log("Test file should produce errors, but none were found. Output:")
print(output)
return False
new_file = options.out_dir / "test-expectations-gen.txt"
with open(new_file, "w") as f:
f.write(output)
log("Wrote test-results: {}", new_file)
expected_file = options.v8_root_dir / "tools/gcmole/test-expectations.txt"
with open(expected_file) as exp_file:
expectations = exp_file.read()
if output != expectations:
diff_file = options.out_dir / "test_output.diff"
print("#" * 79)
log("Output mismatch from running tests.")
log("Please run gcmole manually with --test-run --verbose.")
log("Expected: " + expected_file)
log("New: " + new_file)
log("*Diff:* " + diff_file)
print("#" * 79)
for line in difflib.unified_diff(
expectations.splitlines(),
output.splitlines(),
fromfile=str(new_file),
tofile=str(diff_file),
lineterm="",
):
print(line)
print("#" * 79)
log("Full output")
log("Expected: " + expected_file)
log("Diff: " + diff_file)
log("*New:* " + new_file)
print("#" * 79)
print(output)
print("#" * 79)
return False
log("Tests ran successfully")
return True
# =============================================================================
def relative_parents(path, level=0):
return Path(os.path.relpath(str(path.resolve().parents[level])))
def main(args):
# Print arguments for better debugging on the bots
# Get a clean parent path relative to PWD
gcmole_dir = relative_parents(Path(args[0]))
parser = optparse.OptionParser()
archs = list(ARCHITECTURES.keys())
parser.add_option(
"--v8-target-cpu",
type="choice",
choices=archs,
help="Tested CPU architecture. Choices: {}".format(archs),
metavar="CPU")
default_clang_bin_dir = gcmole_dir / 'gcmole-tools/bin'
parser.add_option(
"--clang-bin-dir",
metavar="DIR",
help="Build dir of the custom clang version for gcmole." + \
"Default: env['CLANG_DIR'] or '{}'".format(default_clang_bin_dir))
parser.add_option(
"--clang-plugins-dir",
metavar="DIR",
help="Containing dir for libgcmole.so."
"Default: env['CLANG_PLUGINS'] or '{}'".format(gcmole_dir))
default_root_dir = relative_parents(gcmole_dir, 1)
parser.add_option(
"--v8-root-dir",
metavar="DIR",
default=default_root_dir,
help="V8 checkout directory. Default: '{}'".format(default_root_dir))
parser.add_option(
"--v8-build-dir",
metavar="BUILD_DIR",
help="GN build dir for v8. Default: 'out/CPU.Release'. "
"Config must match cpu specified by --v8-target-cpu")
parser.add_option(
"--out-dir",
metavar="DIR",
help="Output location for the gcsuspect and gcauses file."
"Default: BUILD_DIR/gen/tools/gcmole")
parser.add_option(
"--is-bot",
action="store_true",
default=False,
help="Flag for setting build bot specific settings.")
group = optparse.OptionGroup(parser, "GCMOLE options")
group.add_option(
"--reuse-gcsuspects",
action="store_true",
default=False,
help="Don't build gcsuspects file and reuse previously generated one.")
group.add_option(
"--sequential",
action="store_true",
default=False,
help="Don't use parallel python runner.")
group.add_option(
"--verbose",
action="store_true",
default=False,
help="Print commands to console before executing them.")
group.add_option(
"--no-dead-vars",
action="store_false",
dest="dead_vars",
default=True,
help="Don't perform dead variable analysis.")
group.add_option(
"--verbose-trace",
action="store_true",
default=False,
help="Enable verbose tracing from the plugin itself."
"This can be useful to debug finding dead variable.")
group.add_option(
"--no-allowlist",
action="store_true",
default=True,
dest="allowlist",
help="When building gcsuspects allowlist certain functions as if they can be "
"causing GC. Currently used to reduce number of false positives in dead "
"variables analysis. See TODO for ALLOWLIST in gcmole.py")
group.add_option(
"--test-run",
action="store_true",
default=False,
help="Test gcmole on tools/gcmole/gcmole-test.cc")
parser.add_option_group(group)
(options, args) = parser.parse_args()
if not options.v8_target_cpu:
# Backwards compatibility
if len(args[0]) > 0 and args[0] in archs:
options.v8_target_cpu = args[0]
log("Using --v8-target-cpu={}", options.v8_target_cpu)
else:
parser.error("Missing --v8-target-cpu option")
options.is_bot = False
verify_and_convert_dirs(parser, options, gcmole_dir, default_clang_bin_dir)
verify_clang_plugin(parser, options)
prepare_gcmole_files(options)
verify_build_config(parser, options)
any_errors_found = False
if not test_run(options):
any_errors_found = True
else:
errors_found, output = check_correctness_for_arch(options, False)
any_errors_found = any_errors_found or errors_found
sys.exit(1 if any_errors_found else 0)
def verify_and_convert_dirs(parser, options, gcmole_dir, default_clang_bin_dir):
# Verify options for setting directors and convert the input strings to Path
# objects.
options.v8_root_dir = Path(options.v8_root_dir)
if not options.clang_bin_dir:
if os.getenv("CLANG_BIN"):
options.clang_bin_dir = Path(os.getenv("CLANG_BIN"))
options.is_bot = True
else:
options.clang_bin_dir = default_clang_bin_dir
if not (options.clang_bin_dir / 'clang++').exists():
options.clang_bin_dir = Path(gcmole_dir,
"tools/gcmole/bootstrap/build/bin")
log("Using --clang-bin-dir={}", options.clang_bin_dir)
else:
options.clang_bin_dir = Path(options.clang_bin_dir)
if not options.clang_plugins_dir:
if os.getenv("CLANG_PLUGINS"):
options.clang_plugins_dir = Path(os.getenv("CLANG_PLUGINS"))
else:
options.clang_plugins_dir = gcmole_dir.resolve()
log("Using --clang-plugins-dir={}", options.clang_plugins_dir)
else:
options.clang_plugins_dir = Path(options.clang_plugins_dir)
if not options.v8_build_dir:
config = ARCHITECTURES[options.v8_target_cpu]
options.v8_build_dir = options.v8_root_dir / ('out/%s.release' %
config.name)
# Fallback for build bots.
if not options.v8_build_dir.exists() and os.getenv("CLANG_BIN"):
options.v8_build_dir = options.v8_root_dir / 'out/build'
log("Using --v8-build-dir={}", options.v8_build_dir)
else:
options.v8_build_dir = Path(options.v8_build_dir)
if not options.out_dir:
options.out_dir = options.v8_build_dir / 'gen/tools/gcmole'
if options.v8_build_dir.exists():
options.out_dir.mkdir(parents=True, exist_ok=True)
else:
options.out_dir = Path(options.out_dir)
for flag in [
"--v8-root-dir", "--v8-build-dir", "--clang-bin-dir",
"--clang-plugins-dir", "--out-dir"
]:
dir = getattr(options, parser.get_option(flag).dest)
if not dir.is_dir():
parser.error("{}='{}' does not exist!".format(flag, dir))
def verify_clang_plugin(parser, options):
libgcmole_path = options.clang_plugins_dir / "libgcmole.so"
if not libgcmole_path.is_file():
parser.error("'{}' does not exist. Please build gcmole first.".format(
libgcmole_path))
clang_path = options.clang_bin_dir / "clang++"
if not clang_path.is_file():
parser.error(
"'{}' does not exist. Please build gcmole first.".format(clang_path))
def prepare_gcmole_files(options):
cmd = [
"ninja", "-C", options.v8_build_dir, "v8_gcmole_files",
"v8_dump_build_config"
]
cmd = list(map(str, cmd))
log("Preparing files: {}", " ".join(cmd))
try:
subprocess.check_call(cmd)
except:
# Ignore ninja task errors on the bots
if options.is_bot:
log("Ninja command failed, ignoring errors.")
else:
raise
def verify_build_config(parser, options):
if options.is_bot:
#TODO(cbruni): Fix, currently not supported on the bots
return
config_file = options.v8_build_dir / 'v8_build_config.json'
with open(config_file) as f:
config = json.load(f)
found_cpu = None
for key in ('v8_target_cpu', 'target_cpu', 'current_cpu'):
found_cpu = config.get('v8_target_cpu')
if found_cpu == options.v8_target_cpu:
return
parser.error("Build dir '{}' config doesn't match request cpu. {}: {}".format(
options.v8_build_dir, options.v8_target_cpu, found_cpu))
if __name__ == "__main__":
main(sys.argv)