v8/tools/gcmole/gcmole.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

574 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]
# for py2/py3 compatibility
from __future__ import print_function
import collections
import difflib
from multiprocessing import cpu_count
import os
import re
import subprocess
import sys
import threading
if sys.version_info.major > 2:
import queue
else:
import Queue as queue
ArchCfg = collections.namedtuple("ArchCfg",
["triple", "arch_define", "arch_options"])
ARCHITECTURES = {
"ia32":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_IA32",
arch_options=["-m32"],
),
"arm":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM",
arch_options=["-m32"],
),
"x64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_X64",
arch_options=[]),
"arm64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM64",
arch_options=[],
),
}
def log(format, *args):
print(format.format(*args))
def fatal(format, *args):
log(format, *args)
sys.exit(1)
# -----------------------------------------------------------------------------
# Clang invocation
def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir):
prefixed_plugin_args = []
if plugin_args:
for arg in plugin_args:
prefixed_plugin_args += [
"-Xclang",
"-plugin-arg-{}".format(plugin),
"-Xclang",
arg,
]
return ([
os.path.join(clang_bin_dir, "clang++"),
"-std=c++14",
"-c",
"-Xclang",
"-load",
"-Xclang",
os.path.join(clang_plugins_dir, "libgcmole.so"),
"-Xclang",
"-plugin",
"-Xclang",
plugin,
] + prefixed_plugin_args + [
"-Xclang",
"-triple",
"-Xclang",
arch_cfg.triple,
"-fno-exceptions",
"-D",
arch_cfg.arch_define,
"-DENABLE_DEBUGGER_SUPPORT",
"-DV8_INTL_SUPPORT",
"-I./",
"-Iinclude/",
"-Iout/build/gen",
"-Ithird_party/icu/source/common",
"-Ithird_party/icu/source/i18n",
] + arch_cfg.arch_options)
def InvokeClangPluginForFile(filename, cmd_line, verbose):
if verbose:
print("popen ", " ".join(cmd_line + [filename]))
p = subprocess.Popen(
cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return p.returncode, stdout, stderr
def InvokeClangPluginForFilesInQueue(i, input_queue, output_queue, cancel_event,
cmd_line, verbose):
success = False
try:
while not cancel_event.is_set():
filename = input_queue.get_nowait()
ret, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line,
verbose)
output_queue.put_nowait((filename, ret, stdout.decode('utf-8'), stderr.decode('utf-8')))
if ret != 0:
break
except KeyboardInterrupt:
log("-- [{}] Interrupting", i)
except queue.Empty:
success = True
finally:
# Emit a success bool so that the reader knows that there was either an
# error or all files were processed.
output_queue.put_nowait(success)
def InvokeClangPluginForEachFile(
filenames,
plugin,
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
):
cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir)
verbose = flags["verbose"]
if flags["sequential"]:
log("** Sequential execution.")
for filename in filenames:
log("-- {}", filename)
returncode, stdout, stderr = InvokeClangPluginForFile(
filename, cmd_line, verbose)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
else:
log("** Parallel execution.")
cpus = cpu_count()
input_queue = queue.Queue()
output_queue = queue.Queue()
threads = []
try:
for filename in filenames:
input_queue.put(filename)
cancel_event = threading.Event()
for i in range(min(len(filenames), cpus)):
threads.append(
threading.Thread(
target=InvokeClangPluginForFilesInQueue,
args=(i, input_queue, output_queue, cancel_event, cmd_line,
verbose)))
for t in threads:
t.start()
num_finished = 0
while num_finished < len(threads):
output = output_queue.get()
if type(output) == bool:
if output:
num_finished += 1
continue
else:
break
filename, returncode, stdout, stderr = output
log("-- {}", filename)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
finally:
cancel_event.set()
for t in threads:
t.join()
# -----------------------------------------------------------------------------
def ParseGNFile(for_test):
result = {}
if for_test:
gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")]
else:
gn_files = [
("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
"test/cctest/"),
]
for filename, pattern, prefix in gn_files:
with open(filename) as gn_file:
gn = gn_file.read()
for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
re.MULTILINE | re.DOTALL):
if condition not in result:
result[condition] = []
for file in pattern.findall(sources):
result[condition].append(prefix + file)
return result
def EvaluateCondition(cond, props):
if cond == "all":
return True
m = re.match("(\w+):(\w+)", cond)
if m is None:
fatal("failed to parse condition: {}", cond)
p, v = m.groups()
if p not in props:
fatal("undefined configuration property: {}", p)
return props[p] == v
def BuildFileList(sources, props):
ret = []
for condition, files in sources.items():
if EvaluateCondition(condition, props):
ret += files
return ret
gn_sources = ParseGNFile(for_test=False)
gn_test_sources = ParseGNFile(for_test=True)
def FilesForArch(arch):
return BuildFileList(gn_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
def FilesForTest(arch):
return BuildFileList(gn_test_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
# -----------------------------------------------------------------------------
# GCSuspects Generation
# Note that the gcsuspects file lists functions in the form:
# mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = [
# The following functions call CEntryStub which is always present.
"MacroAssembler.*,CallRuntime",
"CompileCallLoadPropertyWithInterceptor",
"CallIC.*,GenerateMiss",
# DirectCEntryStub is a special stub used on ARM.
# It is pinned and always present.
"DirectCEntryStub.*,GenerateCall",
# TODO GCMole currently is sensitive enough to understand that certain
# functions only cause GC and return Failure simulataneously.
# Callsites of such functions are safe as long as they are properly
# check return value and propagate the Failure to the caller.
# It should be possible to extend GCMole to understand this.
"Heap.*,TryEvacuateObject",
# Ignore all StateTag methods.
"StateTag",
# Ignore printing of elements transition.
"PrintElementsTransition",
# CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
"CodeCreateEvent",
"WriteField",
]
GC_PATTERN = ",.*Collect.*Garbage"
SAFEPOINT_PATTERN = ",EnterSafepoint"
ALLOWLIST_PATTERN = "|".join("(?:%s)" % p for p in ALLOWLIST)
def MergeRegexp(pattern_dict):
return re.compile("|".join(
"(?P<%s>%s)" % (key, value) for (key, value) in pattern_dict.items()))
IS_SPECIAL_WITHOUT_ALLOW_LIST = MergeRegexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN
})
IS_SPECIAL_WITH_ALLOW_LIST = MergeRegexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN,
"allow": ALLOWLIST_PATTERN
})
class GCSuspectsCollector:
def __init__(self, flags):
self.gc = {}
self.gc_caused = collections.defaultdict(lambda: [])
self.funcs = {}
self.current_caller = None
self.allowlist = flags["allowlist"]
self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST
def AddCause(self, name, cause):
self.gc_caused[name].append(cause)
def Parse(self, lines):
for funcname in lines:
if not funcname:
continue
if funcname[0] != "\t":
self.Resolve(funcname)
self.current_caller = funcname
else:
name = funcname[1:]
callers_for_name = self.Resolve(name)
callers_for_name.add(self.current_caller)
def Resolve(self, name):
if name not in self.funcs:
self.funcs[name] = set()
m = self.is_special.search(name)
if m:
if m.group("gc"):
self.gc[name] = True
self.AddCause(name, "<GC>")
elif m.group("safepoint"):
self.gc[name] = True
self.AddCause(name, "<Safepoint>")
elif m.group("allow"):
self.gc[name] = False
return self.funcs[name]
def Propagate(self):
log("** Propagating GC information")
def mark(funcname, callers):
for caller in callers:
if caller not in self.gc:
self.gc[caller] = True
mark(caller, self.funcs[caller])
self.AddCause(caller, funcname)
for funcname, callers in self.funcs.items():
if self.gc.get(funcname, False):
mark(funcname, callers)
def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir):
# Reset the global state.
collector = GCSuspectsCollector(flags)
log("** Building GC Suspects for {}", arch)
for filename, stdout, stderr in InvokeClangPluginForEachFile(
files, "dump-callees", [], arch_cfg, flags, clang_bin_dir,
clang_plugins_dir):
collector.Parse(stdout.splitlines())
collector.Propagate()
with open("gcsuspects", "w") as out:
for name, value in collector.gc.items():
if value:
out.write(name + "\n")
with open("gccauses", "w") as out:
out.write("GC = {\n")
for name, causes in collector.gc_caused.items():
out.write(" '{}': [\n".format(name))
for cause in causes:
out.write(" '{}',\n".format(cause))
out.write(" ],\n")
out.write("}\n")
log("** GCSuspects generated for {}", arch)
# ------------------------------------------------------------------------------
# Analysis
def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir,
clang_plugins_dir):
if for_test:
files = FilesForTest(arch)
else:
files = FilesForArch(arch)
arch_cfg = ARCHITECTURES[arch]
if not flags["reuse_gcsuspects"]:
GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir)
else:
log("** Reusing GCSuspects for {}", arch)
processed_files = 0
errors_found = False
output = ""
log(
"** Searching for evaluation order problems{} for {}",
" and dead variables" if flags["dead_vars"] else "",
arch,
)
plugin_args = []
if flags["dead_vars"]:
plugin_args.append("--dead-vars")
if flags["verbose_trace"]:
plugin_args.append("--verbose")
for filename, stdout, stderr in InvokeClangPluginForEachFile(
files,
"find-problems",
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
):
processed_files = processed_files + 1
if not errors_found:
errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr,
re.MULTILINE) is not None
if for_test:
output = output + stderr
else:
sys.stdout.write(stderr)
log(
"** Done processing {} files. {}",
processed_files,
"Errors found" if errors_found else "No errors found",
)
return errors_found, output
def TestRun(flags, clang_bin_dir, clang_plugins_dir):
errors_found, output = CheckCorrectnessForArch("x64", True, flags,
clang_bin_dir,
clang_plugins_dir)
if not errors_found:
log("** Test file should produce errors, but none were found. Output:")
log(output)
return False
filename = "tools/gcmole/test-expectations.txt"
with open(filename) as exp_file:
expectations = exp_file.read()
if output != expectations:
log("** Output mismatch from running tests. Please run them manually.")
for line in difflib.context_diff(
expectations.splitlines(),
output.splitlines(),
fromfile=filename,
tofile="output",
lineterm="",
):
log("{}", line)
log("--- Full output ---")
log(output)
log("------")
return False
log("** Tests ran successfully")
return True
def main(args):
DIR = os.path.dirname(args[0])
clang_bin_dir = os.getenv("CLANG_BIN")
clang_plugins_dir = os.getenv("CLANG_PLUGINS")
if not clang_bin_dir or clang_bin_dir == "":
fatal("CLANG_BIN not set")
if not clang_plugins_dir or clang_plugins_dir == "":
clang_plugins_dir = DIR
flags = {
#: not build gcsuspects file and reuse previously generated one.
"reuse_gcsuspects": False,
#:n't use parallel python runner.
"sequential": False,
# Print commands to console before executing them.
"verbose": False,
# Perform dead variable analysis.
"dead_vars": True,
# Enable verbose tracing from the plugin itself.
"verbose_trace": False,
# When building gcsuspects allowlist certain functions as if they can be
# causing GC. Currently used to reduce number of false positives in dead
# variables analysis. See TODO for ALLOWLIST
"allowlist": True,
}
pos_args = []
flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$")
for arg in args[1:]:
m = flag_regexp.match(arg)
if m:
no, flag = m.groups()
flag = flag.replace("-", "_")
if flag in flags:
flags[flag] = no is None
else:
fatal("Unknown flag: {}", flag)
else:
pos_args.append(arg)
archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"]
any_errors_found = False
if not TestRun(flags, clang_bin_dir, clang_plugins_dir):
any_errors_found = True
else:
for arch in archs:
if not ARCHITECTURES[arch]:
fatal("Unknown arch: {}", arch)
errors_found, output = CheckCorrectnessForArch(arch, False, flags,
clang_bin_dir,
clang_plugins_dir)
any_errors_found = any_errors_found or errors_found
sys.exit(1 if any_errors_found else 0)
if __name__ == "__main__":
main(sys.argv)