[tools][perf] Adding linux-perf d8 helper

- Start moving profiler scripts to tools/profiling
- Add linux-perf-d8.py wrapper script that runs `perf record` and
  `perf inject`
- Improve waiting for the d8/chrome process and allow for early
  termination if --timeout is provided
- Allow fractional seconds for --timeout
- Delete run-perf.sh and provide equivalent functionality in
  linux-perf-d8.py

Change-Id: Iac1d6cf895aa7159a9bbb387aca7632df27a0ca3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3585951
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Commit-Queue: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79991}
This commit is contained in:
Camillo Bruni 2022-04-14 15:20:48 +02:00 committed by V8 LUCI CQ
parent 3608b831e9
commit 7dd7100502
5 changed files with 226 additions and 66 deletions

View File

@ -0,0 +1,8 @@
# Profiling Tools
This directory contains various helper scripts to assist with profiling
d8 and chrome.
## linux perf
For [linux perf](https://perf.wiki.kernel.org/) support you can find more
documentation and instructions at <https://v8.dev/docs/linux-perf>.

View File

@ -5,10 +5,8 @@
import optparse
from pathlib import Path
from re import A
import os
import shlex
from signal import SIGQUIT
import subprocess
import signal
import tempfile
@ -16,8 +14,6 @@ import time
import psutil
import multiprocessing
from unittest import result
renderer_cmd_file = Path(__file__).parent / 'linux-perf-renderer-cmd.sh'
assert renderer_cmd_file.is_file()
renderer_cmd_prefix = f"{renderer_cmd_file} --perf-data-prefix=chrome_renderer"
@ -46,7 +42,7 @@ parser.add_option(
help="Also start linux-perf for the browser process. "
"By default only renderer processes are sampled. "
"Outputs 'browser_*.perf.data' in the CDW")
parser.add_option("--timeout", type=int, help="Stop chrome after N seconds")
parser.add_option("--timeout", type=float, help="Stop chrome after N seconds")
chrome_options = optparse.OptionGroup(
parser, "Chrome-forwarded Options",
@ -116,6 +112,17 @@ os.chdir(options.perf_data_dir)
JS_FLAGS_PERF = ("--perf-prof --no-write-protect-code-memory "
"--interpreted-frames-native-stack")
def wait_for_process_timeout(process):
sleeping_time = 0
while (sleeping_time < options.timeout):
processHasStopped = process.poll() is not None
if processHasStopped:
return True
time.sleep(0.5)
return False
with tempfile.TemporaryDirectory(prefix="chrome-") as tmp_dir_path:
tempdir = Path(tmp_dir_path)
cmd = [
@ -151,7 +158,7 @@ with tempfile.TemporaryDirectory(prefix="chrome-") as tmp_dir_path:
subprocess.run(cmd)
else:
process = subprocess.Popen(cmd)
time.sleep(options.timeout)
if not wait_for_process_timeout(process):
log(f"QUITING chrome child processes after {options.timeout}s timeout")
current_process = psutil.Process()
children = current_process.children(recursive=True)
@ -202,6 +209,6 @@ for output_file in reversed(results):
f"{output_file.name:67}{(output_file.stat().st_size*BYTES_TO_MIB):10.2f}MiB"
)
log("PPROF EXAMPLE")
log("PPROF")
path_strings = map(lambda f: str(f.relative_to(old_cwd)), results)
print(f"pprof -flame { ' '.join(path_strings)}")

203
tools/profiling/linux-perf-d8.py Executable file
View File

@ -0,0 +1,203 @@
#!/usr/bin/env python3
# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from datetime import datetime
from pathlib import Path
import optparse
import os
import psutil
import shlex
import signal
import subprocess
import time
# ==============================================================================
usage = """Usage: %prog $D8_BIN [OPTION]... -- [D8_OPTION]... [FILE]
This script runs linux-perf with custom V8 logging to get support to resolve
JS function names.
The perf data is written to OUT_DIR separate by renderer process.
See http://v8.dev//linux-perf for more detailed instructions.
See $D8_BIN --help for more options
"""
parser = optparse.OptionParser(usage=usage)
parser.add_option(
'--perf-data-dir',
default=None,
metavar="OUT_DIR",
help="Output directory for linux perf profile files")
parser.add_option("--timeout", type=float, help="Stop d8 after N seconds")
d8_options = optparse.OptionGroup(
parser, "d8-forwarded Options",
"THese options are for a better script experience that are forward directly"
"to d8. Any other d8 option can be passed after the '--' arguments"
"separator.")
d8_options.add_option(
"--perf-prof-annotate-wasm",
help="Load wasm source map and provide annotate wasm code.",
action="store_true",
default=False)
d8_options.add_option(
"--no-interpreted-frames-native-stack",
help="For profiling v8 copies the interpreter entry trampoline for every "
"interpreted function. This makes interpreted functions identifiable on the "
"native stack at cost of a slight performance and memory overhead.",
action="store_true",
default=False)
parser.add_option_group(d8_options)
perf_options = optparse.OptionGroup(
parser, "perf-forward options", """
These options are forward directly to the `perf record` command and can be
used to manually tweak how profiling works.
See `perf record --help for more` informationn
""")
perf_options.add_option(
"--freq",
default="max",
help="Sampling frequency, either 'max' or a number in herz. "
"Might be reduced depending on the platform.")
perf_options.add_option("--call-graph", default="fp", help="Defaults tp 'fp'")
perf_options.add_option("--event", default=None, help="Not set by default.")
perf_options.add_option(
"--raw-samples",
default=None,
help="Collect raw samples. Not set by default")
perf_options.add_option(
"--count", default=None, help="Event period to sample. Not set by default.")
perf_options.add_option(
"--no-inherit",
action="store_true",
default=False,
help=" Child tasks do not inherit counters.")
parser.add_option_group(perf_options)
# ==============================================================================
def log(*args):
print("")
print("=" * 80)
print(*args)
print("=" * 80)
# ==============================================================================
(options, args) = parser.parse_args()
if len(args) == 0:
parser.error("No d8 binary provided")
d8_bin = Path(args.pop(0))
if not d8_bin.exists():
parser.error(f"D8 '{d8_bin}' does not exist")
if options.perf_data_dir is None:
options.perf_data_dir = Path.cwd()
else:
options.perf_data_dir = Path(options.perf_data_dir).absolute()
if not options.perf_data_dir.is_dir():
parser.error(f"--perf-data-dir={options.perf_data_dir} "
"is not an directory or does not exist.")
if options.timeout and options.timeout < 0:
parser.error("--timeout should be a positive number")
# ==============================================================================
old_cwd = Path.cwd()
os.chdir(options.perf_data_dir)
# ==============================================================================
cmd = [str(d8_bin), "--perf-prof"]
if not options.no_interpreted_frames_native_stack:
cmd += ["--interpreted-frames-native-stack"]
if options.perf_prof_annotate_wasm:
cmd += ["--perf-prof-annotate-wasm"]
cmd += args
log("D8 CMD: ", shlex.join(cmd))
datetime_str = datetime.now().strftime("%Y-%m-%d_%H%M%S")
perf_data_file = Path.cwd() / f"d8_{datetime_str}.perf.data"
perf_cmd = [
"perf", "record", f"--call-graph={options.call_graph}",
f"--freq={options.freq}", "--clockid=mono", f"--output={perf_data_file}"
]
if options.count:
perf_cmd += [f"--count={options.count}"]
if options.raw_samples:
perf_cmd += [f"--raw_samples={options.raw_samples}"]
if options.event:
perf_cmd += [f"--event={options.event}"]
if options.no_inherit:
perf_cmd += [f"--no-inherit"]
cmd = perf_cmd + ["--"] + cmd
log("LINUX PERF CMD: ", shlex.join(cmd))
def wait_for_process_timeout(process):
sleeping_time = 0
while (sleeping_time < options.timeout):
processHasStopped = process.poll() is not None
if processHasStopped:
return True
time.sleep(0.1)
return False
if options.timeout is None:
subprocess.run(cmd)
else:
process = subprocess.Popen(cmd)
if not wait_for_process_timeout(process):
log(f"QUITING d8 processes after {options.timeout}s timeout")
current_process = psutil.Process()
children = current_process.children(recursive=True)
for child in children:
if "d8" in child.name():
print(f" quitting PID={child.pid}")
child.send_signal(signal.SIGQUIT)
# Wait for linux-perf to write out files
time.sleep(1)
process.send_signal(signal.SIGQUIT)
process.wait()
# ==============================================================================
log("POST PROCESSING: Injecting JS symbols")
def inject_v8_symbols(perf_dat_file):
output_file = perf_dat_file.with_suffix(".data.jitted")
cmd = [
"perf", "inject", "--jit", f"--input={perf_dat_file}",
f"--output={output_file}"
]
try:
subprocess.run(cmd)
print(f"Processed: {output_file}")
except:
print(shlex.join(cmd))
return None
return output_file
result = inject_v8_symbols(perf_data_file)
if result is None:
print("No perf files were successfully processed"
" Check for errors or partial results in '{options.perf_data_dir}'")
exit(1)
log(f"RESULTS in '{options.perf_data_dir}'")
BYTES_TO_MIB = 1 / 1024 / 1024
print(f"{result.name:67}{(result.stat().st_size*BYTES_TO_MIB):10.2f}MiB")
log("PPROF")
print(f"pprof -flame {result}")

View File

@ -1,58 +0,0 @@
#! /bin/sh
#
# Copyright 2016 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
########## Global variable definitions
# Ensure that <your CPU clock> / $SAMPLE_EVERY_N_CYCLES < $MAXIMUM_SAMPLE_RATE.
MAXIMUM_SAMPLE_RATE=10000000
SAMPLE_EVERY_N_CYCLES=10000
SAMPLE_RATE_CONFIG_FILE="/proc/sys/kernel/perf_event_max_sample_rate"
KERNEL_MAP_CONFIG_FILE="/proc/sys/kernel/kptr_restrict"
CALL_GRAPH_METHOD="fp" # dwarf does not play nice with JITted objects.
EVENT_TYPE=${EVENT_TYPE:=cycles:u}
########## Usage
usage() {
cat << EOF
usage: $0 <benchmark_command>
Executes <benchmark_command> under observation by Linux perf.
Sampling event is cycles in user space, call graphs are recorded.
EOF
}
if [ $# -eq 0 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then
usage
exit 1
fi
########## Actual script execution
ACTUAL_SAMPLE_RATE=$(cat $SAMPLE_RATE_CONFIG_FILE)
if [ "$ACTUAL_SAMPLE_RATE" -lt "$MAXIMUM_SAMPLE_RATE" ] ; then
echo "Setting appropriate maximum sample rate..."
echo $MAXIMUM_SAMPLE_RATE | sudo tee $SAMPLE_RATE_CONFIG_FILE
fi
ACTUAL_KERNEL_MAP_RESTRICTION=$(cat $KERNEL_MAP_CONFIG_FILE)
if [ "$ACTUAL_KERNEL_MAP_RESTRICTION" -ne "0" ] ; then
echo "Disabling kernel address map restriction..."
echo 0 | sudo tee $KERNEL_MAP_CONFIG_FILE
fi
# Extract the command being perfed, so that we can prepend arguments to the
# arguments that the user supplied.
COMMAND=$1
shift 1
echo "Running..."
perf record -R \
-e $EVENT_TYPE \
-c $SAMPLE_EVERY_N_CYCLES \
--call-graph $CALL_GRAPH_METHOD \
-i "$COMMAND" --perf-basic-prof "$@"