[tools][perf] Adding linux-perf d8 helper
- Start moving profiler scripts to tools/profiling - Add linux-perf-d8.py wrapper script that runs `perf record` and `perf inject` - Improve waiting for the d8/chrome process and allow for early termination if --timeout is provided - Allow fractional seconds for --timeout - Delete run-perf.sh and provide equivalent functionality in linux-perf-d8.py Change-Id: Iac1d6cf895aa7159a9bbb387aca7632df27a0ca3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3585951 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Commit-Queue: Camillo Bruni <cbruni@chromium.org> Cr-Commit-Position: refs/heads/main@{#79991}
This commit is contained in:
parent
3608b831e9
commit
7dd7100502
8
tools/profiling/README.md
Normal file
8
tools/profiling/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
# Profiling Tools
|
||||
|
||||
This directory contains various helper scripts to assist with profiling
|
||||
d8 and chrome.
|
||||
|
||||
## linux perf
|
||||
For [linux perf](https://perf.wiki.kernel.org/) support you can find more
|
||||
documentation and instructions at <https://v8.dev/docs/linux-perf>.
|
@ -5,10 +5,8 @@
|
||||
|
||||
import optparse
|
||||
from pathlib import Path
|
||||
from re import A
|
||||
import os
|
||||
import shlex
|
||||
from signal import SIGQUIT
|
||||
import subprocess
|
||||
import signal
|
||||
import tempfile
|
||||
@ -16,8 +14,6 @@ import time
|
||||
import psutil
|
||||
import multiprocessing
|
||||
|
||||
from unittest import result
|
||||
|
||||
renderer_cmd_file = Path(__file__).parent / 'linux-perf-renderer-cmd.sh'
|
||||
assert renderer_cmd_file.is_file()
|
||||
renderer_cmd_prefix = f"{renderer_cmd_file} --perf-data-prefix=chrome_renderer"
|
||||
@ -46,7 +42,7 @@ parser.add_option(
|
||||
help="Also start linux-perf for the browser process. "
|
||||
"By default only renderer processes are sampled. "
|
||||
"Outputs 'browser_*.perf.data' in the CDW")
|
||||
parser.add_option("--timeout", type=int, help="Stop chrome after N seconds")
|
||||
parser.add_option("--timeout", type=float, help="Stop chrome after N seconds")
|
||||
|
||||
chrome_options = optparse.OptionGroup(
|
||||
parser, "Chrome-forwarded Options",
|
||||
@ -116,6 +112,17 @@ os.chdir(options.perf_data_dir)
|
||||
JS_FLAGS_PERF = ("--perf-prof --no-write-protect-code-memory "
|
||||
"--interpreted-frames-native-stack")
|
||||
|
||||
|
||||
def wait_for_process_timeout(process):
|
||||
sleeping_time = 0
|
||||
while (sleeping_time < options.timeout):
|
||||
processHasStopped = process.poll() is not None
|
||||
if processHasStopped:
|
||||
return True
|
||||
time.sleep(0.5)
|
||||
return False
|
||||
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="chrome-") as tmp_dir_path:
|
||||
tempdir = Path(tmp_dir_path)
|
||||
cmd = [
|
||||
@ -151,7 +158,7 @@ with tempfile.TemporaryDirectory(prefix="chrome-") as tmp_dir_path:
|
||||
subprocess.run(cmd)
|
||||
else:
|
||||
process = subprocess.Popen(cmd)
|
||||
time.sleep(options.timeout)
|
||||
if not wait_for_process_timeout(process):
|
||||
log(f"QUITING chrome child processes after {options.timeout}s timeout")
|
||||
current_process = psutil.Process()
|
||||
children = current_process.children(recursive=True)
|
||||
@ -202,6 +209,6 @@ for output_file in reversed(results):
|
||||
f"{output_file.name:67}{(output_file.stat().st_size*BYTES_TO_MIB):10.2f}MiB"
|
||||
)
|
||||
|
||||
log("PPROF EXAMPLE")
|
||||
log("PPROF")
|
||||
path_strings = map(lambda f: str(f.relative_to(old_cwd)), results)
|
||||
print(f"pprof -flame { ' '.join(path_strings)}")
|
203
tools/profiling/linux-perf-d8.py
Executable file
203
tools/profiling/linux-perf-d8.py
Executable file
@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2022 the V8 project authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import optparse
|
||||
import os
|
||||
import psutil
|
||||
import shlex
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
# ==============================================================================
|
||||
|
||||
usage = """Usage: %prog $D8_BIN [OPTION]... -- [D8_OPTION]... [FILE]
|
||||
|
||||
This script runs linux-perf with custom V8 logging to get support to resolve
|
||||
JS function names.
|
||||
|
||||
The perf data is written to OUT_DIR separate by renderer process.
|
||||
|
||||
See http://v8.dev//linux-perf for more detailed instructions.
|
||||
See $D8_BIN --help for more options
|
||||
"""
|
||||
parser = optparse.OptionParser(usage=usage)
|
||||
parser.add_option(
|
||||
'--perf-data-dir',
|
||||
default=None,
|
||||
metavar="OUT_DIR",
|
||||
help="Output directory for linux perf profile files")
|
||||
parser.add_option("--timeout", type=float, help="Stop d8 after N seconds")
|
||||
|
||||
d8_options = optparse.OptionGroup(
|
||||
parser, "d8-forwarded Options",
|
||||
"THese options are for a better script experience that are forward directly"
|
||||
"to d8. Any other d8 option can be passed after the '--' arguments"
|
||||
"separator.")
|
||||
d8_options.add_option(
|
||||
"--perf-prof-annotate-wasm",
|
||||
help="Load wasm source map and provide annotate wasm code.",
|
||||
action="store_true",
|
||||
default=False)
|
||||
d8_options.add_option(
|
||||
"--no-interpreted-frames-native-stack",
|
||||
help="For profiling v8 copies the interpreter entry trampoline for every "
|
||||
"interpreted function. This makes interpreted functions identifiable on the "
|
||||
"native stack at cost of a slight performance and memory overhead.",
|
||||
action="store_true",
|
||||
default=False)
|
||||
parser.add_option_group(d8_options)
|
||||
|
||||
perf_options = optparse.OptionGroup(
|
||||
parser, "perf-forward options", """
|
||||
These options are forward directly to the `perf record` command and can be
|
||||
used to manually tweak how profiling works.
|
||||
|
||||
See `perf record --help for more` informationn
|
||||
""")
|
||||
|
||||
perf_options.add_option(
|
||||
"--freq",
|
||||
default="max",
|
||||
help="Sampling frequency, either 'max' or a number in herz. "
|
||||
"Might be reduced depending on the platform.")
|
||||
perf_options.add_option("--call-graph", default="fp", help="Defaults tp 'fp'")
|
||||
perf_options.add_option("--event", default=None, help="Not set by default.")
|
||||
perf_options.add_option(
|
||||
"--raw-samples",
|
||||
default=None,
|
||||
help="Collect raw samples. Not set by default")
|
||||
perf_options.add_option(
|
||||
"--count", default=None, help="Event period to sample. Not set by default.")
|
||||
perf_options.add_option(
|
||||
"--no-inherit",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=" Child tasks do not inherit counters.")
|
||||
parser.add_option_group(perf_options)
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
def log(*args):
|
||||
print("")
|
||||
print("=" * 80)
|
||||
print(*args)
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if len(args) == 0:
|
||||
parser.error("No d8 binary provided")
|
||||
|
||||
d8_bin = Path(args.pop(0))
|
||||
if not d8_bin.exists():
|
||||
parser.error(f"D8 '{d8_bin}' does not exist")
|
||||
|
||||
if options.perf_data_dir is None:
|
||||
options.perf_data_dir = Path.cwd()
|
||||
else:
|
||||
options.perf_data_dir = Path(options.perf_data_dir).absolute()
|
||||
|
||||
if not options.perf_data_dir.is_dir():
|
||||
parser.error(f"--perf-data-dir={options.perf_data_dir} "
|
||||
"is not an directory or does not exist.")
|
||||
|
||||
if options.timeout and options.timeout < 0:
|
||||
parser.error("--timeout should be a positive number")
|
||||
|
||||
# ==============================================================================
|
||||
old_cwd = Path.cwd()
|
||||
os.chdir(options.perf_data_dir)
|
||||
|
||||
# ==============================================================================
|
||||
cmd = [str(d8_bin), "--perf-prof"]
|
||||
|
||||
if not options.no_interpreted_frames_native_stack:
|
||||
cmd += ["--interpreted-frames-native-stack"]
|
||||
if options.perf_prof_annotate_wasm:
|
||||
cmd += ["--perf-prof-annotate-wasm"]
|
||||
cmd += args
|
||||
log("D8 CMD: ", shlex.join(cmd))
|
||||
|
||||
datetime_str = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
perf_data_file = Path.cwd() / f"d8_{datetime_str}.perf.data"
|
||||
perf_cmd = [
|
||||
"perf", "record", f"--call-graph={options.call_graph}",
|
||||
f"--freq={options.freq}", "--clockid=mono", f"--output={perf_data_file}"
|
||||
]
|
||||
if options.count:
|
||||
perf_cmd += [f"--count={options.count}"]
|
||||
if options.raw_samples:
|
||||
perf_cmd += [f"--raw_samples={options.raw_samples}"]
|
||||
if options.event:
|
||||
perf_cmd += [f"--event={options.event}"]
|
||||
if options.no_inherit:
|
||||
perf_cmd += [f"--no-inherit"]
|
||||
|
||||
cmd = perf_cmd + ["--"] + cmd
|
||||
log("LINUX PERF CMD: ", shlex.join(cmd))
|
||||
|
||||
|
||||
def wait_for_process_timeout(process):
|
||||
sleeping_time = 0
|
||||
while (sleeping_time < options.timeout):
|
||||
processHasStopped = process.poll() is not None
|
||||
if processHasStopped:
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
if options.timeout is None:
|
||||
subprocess.run(cmd)
|
||||
else:
|
||||
process = subprocess.Popen(cmd)
|
||||
if not wait_for_process_timeout(process):
|
||||
log(f"QUITING d8 processes after {options.timeout}s timeout")
|
||||
current_process = psutil.Process()
|
||||
children = current_process.children(recursive=True)
|
||||
for child in children:
|
||||
if "d8" in child.name():
|
||||
print(f" quitting PID={child.pid}")
|
||||
child.send_signal(signal.SIGQUIT)
|
||||
# Wait for linux-perf to write out files
|
||||
time.sleep(1)
|
||||
process.send_signal(signal.SIGQUIT)
|
||||
process.wait()
|
||||
|
||||
# ==============================================================================
|
||||
log("POST PROCESSING: Injecting JS symbols")
|
||||
|
||||
|
||||
def inject_v8_symbols(perf_dat_file):
|
||||
output_file = perf_dat_file.with_suffix(".data.jitted")
|
||||
cmd = [
|
||||
"perf", "inject", "--jit", f"--input={perf_dat_file}",
|
||||
f"--output={output_file}"
|
||||
]
|
||||
try:
|
||||
subprocess.run(cmd)
|
||||
print(f"Processed: {output_file}")
|
||||
except:
|
||||
print(shlex.join(cmd))
|
||||
return None
|
||||
return output_file
|
||||
|
||||
|
||||
result = inject_v8_symbols(perf_data_file)
|
||||
if result is None:
|
||||
print("No perf files were successfully processed"
|
||||
" Check for errors or partial results in '{options.perf_data_dir}'")
|
||||
exit(1)
|
||||
log(f"RESULTS in '{options.perf_data_dir}'")
|
||||
BYTES_TO_MIB = 1 / 1024 / 1024
|
||||
print(f"{result.name:67}{(result.stat().st_size*BYTES_TO_MIB):10.2f}MiB")
|
||||
|
||||
log("PPROF")
|
||||
print(f"pprof -flame {result}")
|
@ -1,58 +0,0 @@
|
||||
#! /bin/sh
|
||||
#
|
||||
# Copyright 2016 the V8 project authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
#
|
||||
|
||||
########## Global variable definitions
|
||||
|
||||
# Ensure that <your CPU clock> / $SAMPLE_EVERY_N_CYCLES < $MAXIMUM_SAMPLE_RATE.
|
||||
MAXIMUM_SAMPLE_RATE=10000000
|
||||
SAMPLE_EVERY_N_CYCLES=10000
|
||||
SAMPLE_RATE_CONFIG_FILE="/proc/sys/kernel/perf_event_max_sample_rate"
|
||||
KERNEL_MAP_CONFIG_FILE="/proc/sys/kernel/kptr_restrict"
|
||||
CALL_GRAPH_METHOD="fp" # dwarf does not play nice with JITted objects.
|
||||
EVENT_TYPE=${EVENT_TYPE:=cycles:u}
|
||||
|
||||
########## Usage
|
||||
|
||||
usage() {
|
||||
cat << EOF
|
||||
usage: $0 <benchmark_command>
|
||||
|
||||
Executes <benchmark_command> under observation by Linux perf.
|
||||
Sampling event is cycles in user space, call graphs are recorded.
|
||||
EOF
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########## Actual script execution
|
||||
|
||||
ACTUAL_SAMPLE_RATE=$(cat $SAMPLE_RATE_CONFIG_FILE)
|
||||
if [ "$ACTUAL_SAMPLE_RATE" -lt "$MAXIMUM_SAMPLE_RATE" ] ; then
|
||||
echo "Setting appropriate maximum sample rate..."
|
||||
echo $MAXIMUM_SAMPLE_RATE | sudo tee $SAMPLE_RATE_CONFIG_FILE
|
||||
fi
|
||||
|
||||
ACTUAL_KERNEL_MAP_RESTRICTION=$(cat $KERNEL_MAP_CONFIG_FILE)
|
||||
if [ "$ACTUAL_KERNEL_MAP_RESTRICTION" -ne "0" ] ; then
|
||||
echo "Disabling kernel address map restriction..."
|
||||
echo 0 | sudo tee $KERNEL_MAP_CONFIG_FILE
|
||||
fi
|
||||
|
||||
# Extract the command being perfed, so that we can prepend arguments to the
|
||||
# arguments that the user supplied.
|
||||
COMMAND=$1
|
||||
shift 1
|
||||
|
||||
echo "Running..."
|
||||
perf record -R \
|
||||
-e $EVENT_TYPE \
|
||||
-c $SAMPLE_EVERY_N_CYCLES \
|
||||
--call-graph $CALL_GRAPH_METHOD \
|
||||
-i "$COMMAND" --perf-basic-prof "$@"
|
Loading…
Reference in New Issue
Block a user