v8/tools/gcmole/gcmole.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

577 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]
# for py2/py3 compatibility
from __future__ import print_function
import collections
import difflib
from multiprocessing import cpu_count
import os
import re
import subprocess
import sys
import threading
if sys.version_info.major > 2:
import queue
else:
import Queue as queue
ArchCfg = collections.namedtuple("ArchCfg",
["triple", "arch_define", "arch_options"])
ARCHITECTURES = {
"ia32":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_IA32",
arch_options=["-m32"],
),
"arm":
ArchCfg(
triple="i586-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM",
arch_options=["-m32"],
),
"x64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_X64",
arch_options=[]),
"arm64":
ArchCfg(
triple="x86_64-unknown-linux",
arch_define="V8_TARGET_ARCH_ARM64",
arch_options=[],
),
}
def log(format, *args):
print(format.format(*args))
def fatal(format, *args):
log(format, *args)
sys.exit(1)
# -----------------------------------------------------------------------------
# Clang invocation
def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir):
prefixed_plugin_args = []
if plugin_args:
for arg in plugin_args:
prefixed_plugin_args += [
"-Xclang",
"-plugin-arg-{}".format(plugin),
"-Xclang",
arg,
]
return ([
os.path.join(clang_bin_dir, "clang++"),
"-std=c++14",
"-c",
"-Xclang",
"-load",
"-Xclang",
os.path.join(clang_plugins_dir, "libgcmole.so"),
"-Xclang",
"-plugin",
"-Xclang",
plugin,
] + prefixed_plugin_args + [
"-Xclang",
"-triple",
"-Xclang",
arch_cfg.triple,
"-fno-exceptions",
"-D",
arch_cfg.arch_define,
"-DENABLE_DEBUGGER_SUPPORT",
"-DV8_INTL_SUPPORT",
"-DV8_ENABLE_WEBASSEMBLY",
"-I./",
"-Iinclude/",
"-Iout/build/gen",
"-Ithird_party/icu/source/common",
"-Ithird_party/icu/source/i18n",
] + arch_cfg.arch_options)
def InvokeClangPluginForFile(filename, cmd_line, verbose):
if verbose:
print("popen ", " ".join(cmd_line + [filename]))
p = subprocess.Popen(
cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return p.returncode, stdout, stderr
def InvokeClangPluginForFilesInQueue(i, input_queue, output_queue, cancel_event,
cmd_line, verbose):
success = False
try:
while not cancel_event.is_set():
filename = input_queue.get_nowait()
ret, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line,
verbose)
output_queue.put_nowait((filename, ret, stdout.decode('utf-8'), stderr.decode('utf-8')))
if ret != 0:
break
except KeyboardInterrupt:
log("-- [{}] Interrupting", i)
except queue.Empty:
success = True
finally:
# Emit a success bool so that the reader knows that there was either an
# error or all files were processed.
output_queue.put_nowait(success)
def InvokeClangPluginForEachFile(
filenames,
plugin,
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
):
cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
clang_plugins_dir)
verbose = flags["verbose"]
if flags["sequential"]:
log("** Sequential execution.")
for filename in filenames:
log("-- {}", filename)
returncode, stdout, stderr = InvokeClangPluginForFile(
filename, cmd_line, verbose)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
else:
log("** Parallel execution.")
cpus = cpu_count()
input_queue = queue.Queue()
output_queue = queue.Queue()
threads = []
try:
for filename in filenames:
input_queue.put(filename)
cancel_event = threading.Event()
for i in range(min(len(filenames), cpus)):
threads.append(
threading.Thread(
target=InvokeClangPluginForFilesInQueue,
args=(i, input_queue, output_queue, cancel_event, cmd_line,
verbose)))
for t in threads:
t.start()
num_finished = 0
while num_finished < len(threads):
output = output_queue.get()
if type(output) == bool:
if output:
num_finished += 1
continue
else:
break
filename, returncode, stdout, stderr = output
log("-- {}", filename)
if returncode != 0:
sys.stderr.write(stderr)
sys.exit(returncode)
yield filename, stdout, stderr
finally:
cancel_event.set()
for t in threads:
t.join()
# -----------------------------------------------------------------------------
def ParseGNFile(for_test):
result = {}
if for_test:
gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")]
else:
gn_files = [
("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
"test/cctest/"),
]
for filename, pattern, prefix in gn_files:
with open(filename) as gn_file:
gn = gn_file.read()
for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
re.MULTILINE | re.DOTALL):
if condition not in result:
result[condition] = []
for file in pattern.findall(sources):
result[condition].append(prefix + file)
return result
def EvaluateCondition(cond, props):
if cond == "all":
return True
m = re.match("(\w+):(\w+)", cond)
if m is None:
fatal("failed to parse condition: {}", cond)
p, v = m.groups()
if p not in props:
fatal("undefined configuration property: {}", p)
return props[p] == v
def BuildFileList(sources, props):
ret = []
for condition, files in sources.items():
if EvaluateCondition(condition, props):
ret += files
return ret
gn_sources = ParseGNFile(for_test=False)
gn_test_sources = ParseGNFile(for_test=True)
def FilesForArch(arch):
return BuildFileList(gn_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
def FilesForTest(arch):
return BuildFileList(gn_test_sources, {
"os": "linux",
"arch": arch,
"mode": "debug",
"simulator": ""
})
# -----------------------------------------------------------------------------
# GCSuspects Generation
# Note that the gcsuspects file lists functions in the form:
# mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = [
# The following functions call CEntryStub which is always present.
"MacroAssembler.*,CallRuntime",
"CompileCallLoadPropertyWithInterceptor",
"CallIC.*,GenerateMiss",
# DirectCEntryStub is a special stub used on ARM.
# It is pinned and always present.
"DirectCEntryStub.*,GenerateCall",
# TODO GCMole currently is sensitive enough to understand that certain
# functions only cause GC and return Failure simulataneously.
# Callsites of such functions are safe as long as they are properly
# check return value and propagate the Failure to the caller.
# It should be possible to extend GCMole to understand this.
"Heap.*,TryEvacuateObject",
# Ignore all StateTag methods.
"StateTag",
# Ignore printing of elements transition.
"PrintElementsTransition",
# CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
"CodeCreateEvent",
"WriteField",
]
GC_PATTERN = ",.*Collect.*Garbage"
[heap] Safepointing with an atomic state To improve performance of parking, keep the thread state in an atomic variable instead of protecting it with a mutex. However the mutex was used e.g. to force Unpark() to block while the safepoint operation was still running. Therefore the safepoint algorithm has to change as well. Park() and Unpark() use CAS operation to transition the state. Safepoint() uses a relaxed load for checking whether a safepoint was requested. Since Safepoint(), Park() and Unpark() all have a slow path, there is no need for busy-waiting on the main thread. We need two more ThreadStates: * SafepointRequested: This state is set by GlobalSafepoint to force Running threads into the slow path on Safepoint() and Park(). This state also replaces the separate atomic<bool> safepoint_requested_ field we used before. * ParkedSafepoint: This state is set by GlobalSafepoint as well to force parked threads into the slow path on Unpark(). When stopping all threads, GlobalSafepoint transitions states from Running --> SafepointRequested and Parked --> ParkedSafepoint to force the slow path for all three methods. After performing the transition for each thread we know the exact number of Running threads and wait until each of them either reached a safepoint or parked itself. Design doc: https://docs.google.com/document/d/1p9klWyqT_AScAnK_PdHZTcNhZGzoBiYWPkUciIh2C58/edit?usp=sharing Bug: chromium:1177144, v8:10315 Change-Id: I8697da915c7d18e2fb941f1bedf6181226408feb Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2704075 Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Reviewed-by: Maya Lekova <mslekova@chromium.org> Commit-Queue: Dominik Inführ <dinfuehr@chromium.org> Cr-Commit-Position: refs/heads/master@{#73089}
2021-02-25 16:17:59 +00:00
SAFEPOINT_PATTERN = ",SafepointSlowPath"
ALLOWLIST_PATTERN = "|".join("(?:%s)" % p for p in ALLOWLIST)
def MergeRegexp(pattern_dict):
return re.compile("|".join(
"(?P<%s>%s)" % (key, value) for (key, value) in pattern_dict.items()))
IS_SPECIAL_WITHOUT_ALLOW_LIST = MergeRegexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN
})
IS_SPECIAL_WITH_ALLOW_LIST = MergeRegexp({
"gc": GC_PATTERN,
"safepoint": SAFEPOINT_PATTERN,
"allow": ALLOWLIST_PATTERN
})
class GCSuspectsCollector:
def __init__(self, flags):
self.gc = {}
self.gc_caused = collections.defaultdict(lambda: [])
self.funcs = {}
self.current_caller = None
self.allowlist = flags["allowlist"]
self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST
def AddCause(self, name, cause):
self.gc_caused[name].append(cause)
def Parse(self, lines):
for funcname in lines:
if not funcname:
continue
if funcname[0] != "\t":
self.Resolve(funcname)
self.current_caller = funcname
else:
name = funcname[1:]
callers_for_name = self.Resolve(name)
callers_for_name.add(self.current_caller)
def Resolve(self, name):
if name not in self.funcs:
self.funcs[name] = set()
m = self.is_special.search(name)
if m:
if m.group("gc"):
self.gc[name] = True
self.AddCause(name, "<GC>")
elif m.group("safepoint"):
self.gc[name] = True
self.AddCause(name, "<Safepoint>")
elif m.group("allow"):
self.gc[name] = False
return self.funcs[name]
def Propagate(self):
log("** Propagating GC information")
def mark(funcname, callers):
for caller in callers:
if caller not in self.gc:
self.gc[caller] = True
mark(caller, self.funcs[caller])
self.AddCause(caller, funcname)
for funcname, callers in self.funcs.items():
if self.gc.get(funcname, False):
mark(funcname, callers)
def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir):
# Reset the global state.
collector = GCSuspectsCollector(flags)
log("** Building GC Suspects for {}", arch)
for filename, stdout, stderr in InvokeClangPluginForEachFile(
files, "dump-callees", [], arch_cfg, flags, clang_bin_dir,
clang_plugins_dir):
collector.Parse(stdout.splitlines())
collector.Propagate()
with open("gcsuspects", "w") as out:
for name, value in collector.gc.items():
if value:
out.write(name + "\n")
with open("gccauses", "w") as out:
out.write("GC = {\n")
for name, causes in collector.gc_caused.items():
out.write(" '{}': [\n".format(name))
for cause in causes:
out.write(" '{}',\n".format(cause))
out.write(" ],\n")
out.write("}\n")
log("** GCSuspects generated for {}", arch)
# ------------------------------------------------------------------------------
# Analysis
def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir,
clang_plugins_dir):
if for_test:
files = FilesForTest(arch)
else:
files = FilesForArch(arch)
arch_cfg = ARCHITECTURES[arch]
if not flags["reuse_gcsuspects"]:
GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
clang_plugins_dir)
else:
log("** Reusing GCSuspects for {}", arch)
processed_files = 0
errors_found = False
output = ""
log(
"** Searching for evaluation order problems{} for {}",
" and dead variables" if flags["dead_vars"] else "",
arch,
)
plugin_args = []
if flags["dead_vars"]:
plugin_args.append("--dead-vars")
if flags["verbose_trace"]:
plugin_args.append("--verbose")
for filename, stdout, stderr in InvokeClangPluginForEachFile(
files,
"find-problems",
plugin_args,
arch_cfg,
flags,
clang_bin_dir,
clang_plugins_dir,
):
processed_files = processed_files + 1
if not errors_found:
errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr,
re.MULTILINE) is not None
if for_test:
output = output + stderr
else:
sys.stdout.write(stderr)
log(
"** Done processing {} files. {}",
processed_files,
"Errors found" if errors_found else "No errors found",
)
return errors_found, output
def TestRun(flags, clang_bin_dir, clang_plugins_dir):
log("** Test Run")
errors_found, output = CheckCorrectnessForArch("x64", True, flags,
clang_bin_dir,
clang_plugins_dir)
if not errors_found:
log("** Test file should produce errors, but none were found. Output:")
log(output)
return False
filename = "tools/gcmole/test-expectations.txt"
with open(filename) as exp_file:
expectations = exp_file.read()
if output != expectations:
log("** Output mismatch from running tests. Please run them manually.")
for line in difflib.unified_diff(
expectations.splitlines(),
output.splitlines(),
fromfile=filename,
tofile="output",
lineterm="",
):
log("{}", line)
log("------")
log("--- Full output ---")
log(output)
log("------")
return False
log("** Tests ran successfully")
return True
def main(args):
DIR = os.path.dirname(args[0])
clang_bin_dir = os.getenv("CLANG_BIN")
clang_plugins_dir = os.getenv("CLANG_PLUGINS")
if not clang_bin_dir or clang_bin_dir == "":
fatal("CLANG_BIN not set")
if not clang_plugins_dir or clang_plugins_dir == "":
clang_plugins_dir = DIR
flags = {
#: not build gcsuspects file and reuse previously generated one.
"reuse_gcsuspects": False,
#:n't use parallel python runner.
"sequential": False,
# Print commands to console before executing them.
"verbose": True,
# Perform dead variable analysis.
"dead_vars": True,
# Enable verbose tracing from the plugin itself.
"verbose_trace": False,
# When building gcsuspects allowlist certain functions as if they can be
# causing GC. Currently used to reduce number of false positives in dead
# variables analysis. See TODO for ALLOWLIST
"allowlist": True,
}
pos_args = []
flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$")
for arg in args[1:]:
m = flag_regexp.match(arg)
if m:
no, flag = m.groups()
flag = flag.replace("-", "_")
if flag in flags:
flags[flag] = no is None
else:
fatal("Unknown flag: {}", flag)
else:
pos_args.append(arg)
archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"]
any_errors_found = False
if not TestRun(flags, clang_bin_dir, clang_plugins_dir):
any_errors_found = True
else:
for arch in archs:
if not ARCHITECTURES[arch]:
fatal("Unknown arch: {}", arch)
errors_found, output = CheckCorrectnessForArch(arch, False, flags,
clang_bin_dir,
clang_plugins_dir)
any_errors_found = any_errors_found or errors_found
sys.exit(1 if any_errors_found else 0)
if __name__ == "__main__":
main(sys.argv)