v8/tools/gcmole/gcmole.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

524 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]
# for py2/py3 compatibility
from __future__ import print_function
import sys
import os
import re
import difflib
import subprocess
import multiprocessing
import collections
ArchCfg = collections.namedtuple("ArchCfg",
["triple", "arch_define", "arch_options"])
ARCHITECTURES = {
"ia32":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_IA32",
arch_options=["-m32"],
),
"arm":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM",
arch_options=["-m32"],
),
"x64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_X64",
arch_options=[]),
"arm64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM64",
arch_options=[],
),
}
def log(format, *args):
print(format.format(*args))
def fatal(format, *args):
log(format, *args)
sys.exit(1)
# -----------------------------------------------------------------------------
# Clang invocation
def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir):
prefixed_plugin_args = []
if plugin_args:
for arg in plugin_args:
prefixed_plugin_args += [
"-Xclang",
"-plugin-arg-{}".format(plugin),
"-Xclang",
arg,
]
return ([
os.path.join(clang_bin_dir, "clang++"),
"-std=c++14",
"-c",
"-Xclang",
"-load",
"-Xclang",
os.path.join(clang_plugins_dir, "libgcmole.so"),
"-Xclang",
"-plugin",
"-Xclang",
plugin,
] + prefixed_plugin_args + [
"-Xclang",
"-triple",
"-Xclang",
arch_cfg.triple,
"-fno-exceptions",
"-D",
arch_cfg.arch_define,
"-DENABLE_DEBUGGER_SUPPORT",
"-DV8_INTL_SUPPORT",
"-I./",
"-Iinclude/",
"-Iout/build/gen",
"-Ithird_party/icu/source/common",
"-Ithird_party/icu/source/i18n",
] + arch_cfg.arch_options)
def InvokeClangPluginForFile(filename, cmd_line, verbose):
try:
log("-- {}", filename)
if verbose:
print("popen ", " ".join(cmd_line + [filename]))
p = subprocess.Popen(cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return p.returncode, stdout, stderr
except KeyboardInterrupt:
log("-- Interrupting {}", filename)
return 1, ""
def InvokeClangPluginForEachFile(
filenames,
plugin,
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
):
cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir)
verbose = flags["verbose"]
outputs = {}
if flags["sequential"]:
log("** Sequential execution.")
for filename in filenames:
returncode, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line, verbose)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
outputs[filename] = (stdout, stderr)
else:
log("** Parallel execution.")
cpus = multiprocessing.cpu_count()
pool = multiprocessing.Pool(cpus)
try:
# Track active invokes with a semaphore, to prevent submitting too many
# concurrent invokes to the pool.
execution_slots = multiprocessing.BoundedSemaphore(cpus)
async_outputs = {}
for filename in filenames:
execution_slots.acquire()
def callback(output):
execution_slots.release()
async_outputs[filename] = pool.apply_async(
InvokeClangPluginForFile, (filename, cmd_line, verbose),
callback=callback)
for filename, output in async_outputs.items():
returncode, stdout, stderr = output.get()
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
outputs[filename] = (stdout, stderr)
except KeyboardInterrupt as e:
pool.terminate()
pool.join()
raise e
finally:
pool.close()
return outputs
# -----------------------------------------------------------------------------
def ParseGNFile(for_test):
result = {}
if for_test:
gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")]
else:
gn_files = [
("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
"test/cctest/"),
]
for filename, pattern, prefix in gn_files:
with open(filename) as gn_file:
gn = gn_file.read()
for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
re.MULTILINE | re.DOTALL):
if condition not in result:
result[condition] = []
for file in pattern.findall(sources):
result[condition].append(prefix + file)
return result
def EvaluateCondition(cond, props):
if cond == "all":
return True
m = re.match("(\w+):(\w+)", cond)
if m is None:
fatal("failed to parse condition: {}", cond)
p, v = m.groups()
if p not in props:
fatal("undefined configuration property: {}", p)
return props[p] == v
def BuildFileList(sources, props):
ret = []
for condition, files in sources.items():
if EvaluateCondition(condition, props):
ret += files
return ret
gn_sources = ParseGNFile(for_test=False)
gn_test_sources = ParseGNFile(for_test=True)
def FilesForArch(arch):
return BuildFileList(gn_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
def FilesForTest(arch):
return BuildFileList(gn_test_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
# -----------------------------------------------------------------------------
# GCSuspects Generation
# Note that the gcsuspects file lists functions in the form:
# mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = set([
# The following functions call CEntryStub which is always present.
"MacroAssembler.*,CallRuntime",
"CompileCallLoadPropertyWithInterceptor",
"CallIC.*,GenerateMiss",
# DirectCEntryStub is a special stub used on ARM.
# It is pinned and always present.
"DirectCEntryStub.*,GenerateCall",
# TODO GCMole currently is sensitive enough to understand that certain
# functions only cause GC and return Failure simulataneously.
# Callsites of such functions are safe as long as they are properly
# check return value and propagate the Failure to the caller.
# It should be possible to extend GCMole to understand this.
"Heap.*,TryEvacuateObject",
# Ignore all StateTag methods.
"StateTag",
# Ignore printing of elements transition.
"PrintElementsTransition",
# CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
"CodeCreateEvent",
"WriteField",
])
class GCSuspectsCollector:
def __init__(self, flags):
self.gc = {}
self.gc_caused = collections.defaultdict(lambda: [])
self.funcs = {}
self.current_scope = None
self.allowlist = flags["allowlist"]
def AddCause(self, name, cause):
self.gc_caused[name].append(cause)
def Parse(self, lines):
for funcname in lines:
if not funcname:
continue
if funcname[0] != "\t":
self.Resolve(funcname)
self.current_scope = funcname
else:
name = funcname[1:]
self.Resolve(name)[self.current_scope] = True
def Resolve(self, name):
if name not in self.funcs:
self.funcs[name] = {}
if re.search(",.*Collect.*Garbage", name):
self.gc[name] = True
self.AddCause(name, "<GC>")
if re.search(",EnterSafepoint", name):
self.gc[name] = True
self.AddCause(name, "<Safepoint>")
if self.allowlist:
for allow in ALLOWLIST:
if re.search(allow, name):
self.gc[name] = False
return self.funcs[name]
def Propagate(self):
log("** Propagating GC information")
def mark(funcname, callers):
for caller in callers:
if caller not in self.gc:
self.gc[caller] = True
mark(caller, self.funcs[caller])
self.AddCause(caller, funcname)
for funcname, callers in self.funcs.items():
if self.gc.get(funcname, False):
mark(funcname, callers)
def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir):
# Reset the global state.
collector = GCSuspectsCollector(flags)
log("** Building GC Suspects for {}", arch)
for filename, (stdout, stderr) in InvokeClangPluginForEachFile(
files, "dump-callees", [], arch_cfg, flags, clang_bin_dir,
clang_plugins_dir).items():
collector.Parse(stdout.split('\n'))
collector.Propagate()
with open("gcsuspects", "w") as out:
for name, value in collector.gc.items():
if value:
out.write(name + "\n")
with open("gccauses", "w") as out:
out.write("GC = {\n")
for name, causes in collector.gc_caused.items():
out.write(" '{}': [\n".format(name))
for cause in causes:
out.write(" '{}',\n".format(cause))
out.write(" ],\n")
out.write("}\n")
log("** GCSuspects generated for {}", arch)
# ------------------------------------------------------------------------------
# Analysis
def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir,
clang_plugins_dir):
if for_test:
files = FilesForTest(arch)
else:
files = FilesForArch(arch)
arch_cfg = ARCHITECTURES[arch]
if not flags["reuse_gcsuspects"]:
GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir)
else:
log("** Reusing GCSuspects for {}", arch)
processed_files = 0
errors_found = False
output = ""
log(
"** Searching for evaluation order problems{} for {}",
" and dead variables" if flags["dead_vars"] else "",
arch,
)
plugin_args = []
if flags["dead_vars"]:
plugin_args.append("--dead-vars")
if flags["verbose_trace"]:
plugin_args.append("--verbose")
for filename, (stdout, stderr) in InvokeClangPluginForEachFile(
files,
"find-problems",
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
).items():
processed_files = processed_files + 1
for l in stderr.split('\n'):
if not errors_found:
errors_found = re.match("^[^:]+:\d+:\d+: (warning|error)",
l) is not None
if for_test:
output = output + "\n" + l
else:
print(l)
log(
"** Done processing {} files. {}",
processed_files,
"Errors found" if errors_found else "No errors found",
)
return errors_found, output
def TestRun(flags, clang_bin_dir, clang_plugins_dir):
errors_found, output = CheckCorrectnessForArch("x64", True, flags,
clang_bin_dir,
clang_plugins_dir)
if not errors_found:
log("** Test file should produce errors, but none were found. Output:")
log(output)
return False
filename = "tools/gcmole/test-expectations.txt"
with open(filename) as exp_file:
expectations = exp_file.read()
if output != expectations:
log("** Output mismatch from running tests. Please run them manually.")
for line in difflib.context_diff(
expectations.split("\n"),
output.split("\n"),
fromfile=filename,
tofile="output",
lineterm="",
):
log("{}", line)
log("--- Full output ---")
log(output)
log("------")
return False
log("** Tests ran successfully")
return True
def main(args):
DIR = os.path.dirname(args[0])
clang_bin_dir = os.getenv("CLANG_BIN")
clang_plugins_dir = os.getenv("CLANG_PLUGINS")
if not clang_bin_dir or clang_bin_dir == "":
fatal("CLANG_BIN not set")
if not clang_plugins_dir or clang_plugins_dir == "":
clang_plugins_dir = DIR
flags = {
#: not build gcsuspects file and reuse previously generated one.
"reuse_gcsuspects": False,
#:n't use parallel python runner.
"sequential": False,
# Print commands to console before executing them.
"verbose": False,
# Perform dead variable analysis.
"dead_vars": True,
# Enable verbose tracing from the plugin itself.
"verbose_trace": False,
# When building gcsuspects allowlist certain functions as if they can be
# causing GC. Currently used to reduce number of false positives in dead
# variables analysis. See TODO for ALLOWLIST
"allowlist": True,
}
pos_args = []
flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$")
for arg in args[1:]:
m = flag_regexp.match(arg)
if m:
no, flag = m.groups()
flag = flag.replace("-", "_")
if flag in flags:
flags[flag] = no is None
else:
fatal("Unknown flag: {}", flag)
else:
pos_args.append(arg)
archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"]
any_errors_found = False
if not TestRun(flags, clang_bin_dir, clang_plugins_dir):
any_errors_found = True
else:
for arch in archs:
if not ARCHITECTURES[arch]:
fatal("Unknown arch: {}", arch)
errors_found, output = CheckCorrectnessForArch(arch, False, flags,
clang_bin_dir,
clang_plugins_dir)
any_errors_found = any_errors_found or errors_found
sys.exit(1 if any_errors_found else 0)
if __name__ == "__main__":
main(sys.argv)