llprof improvements
Review URL: https://codereview.chromium.org/11444031 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13168 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
fe7a68463a
commit
125b1c29cd
@ -45,7 +45,7 @@ USAGE="""usage: %prog [OPTION]...
|
||||
Analyses V8 and perf logs to produce profiles.
|
||||
|
||||
Perf logs can be collected using a command like:
|
||||
$ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
|
||||
$ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
|
||||
# -R: collect all data
|
||||
# -e cycles: use cpu-cycles event (run "perf list" for details)
|
||||
# -c 10000: write a sample after each 10000 events
|
||||
@ -54,6 +54,16 @@ Perf logs can be collected using a command like:
|
||||
# --ll-prof shell flag enables the right V8 logs
|
||||
This will produce a binary trace file (perf.data) that %prog can analyse.
|
||||
|
||||
IMPORTANT:
|
||||
The kernel has an internal maximum for events per second, it is 100K by
|
||||
default. That's not enough for "-c 10000". Set it to some higher value:
|
||||
$ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
|
||||
You can also make the warning about kernel address maps go away:
|
||||
$ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
|
||||
|
||||
We have a convenience script that handles all of the above for you:
|
||||
$ tools/run-llprof.sh ./d8 bench.js
|
||||
|
||||
Examples:
|
||||
# Print flat profile with annotated disassembly for the 10 top
|
||||
# symbols. Use default log names and include the snapshot log.
|
||||
@ -75,6 +85,10 @@ class Code(object):
|
||||
"""Code object."""
|
||||
|
||||
_id = 0
|
||||
UNKNOWN = 0
|
||||
V8INTERNAL = 1
|
||||
FULL_CODEGEN = 2
|
||||
OPTIMIZED = 3
|
||||
|
||||
def __init__(self, name, start_address, end_address, origin, origin_offset):
|
||||
self.id = Code._id
|
||||
@ -88,6 +102,14 @@ class Code(object):
|
||||
self.self_ticks = 0
|
||||
self.self_ticks_map = None
|
||||
self.callee_ticks = None
|
||||
if name.startswith("LazyCompile:*"):
|
||||
self.codetype = Code.OPTIMIZED
|
||||
elif name.startswith("LazyCompile:"):
|
||||
self.codetype = Code.FULL_CODEGEN
|
||||
elif name.startswith("v8::internal::"):
|
||||
self.codetype = Code.V8INTERNAL
|
||||
else:
|
||||
self.codetype = Code.UNKNOWN
|
||||
|
||||
def AddName(self, name):
|
||||
assert self.name != name
|
||||
@ -185,7 +207,7 @@ class Code(object):
|
||||
class CodePage(object):
|
||||
"""Group of adjacent code objects."""
|
||||
|
||||
SHIFT = 12 # 4K pages
|
||||
SHIFT = 20 # 1M pages
|
||||
SIZE = (1 << SHIFT)
|
||||
MASK = ~(SIZE - 1)
|
||||
|
||||
@ -507,6 +529,7 @@ class Descriptor(object):
|
||||
# for the gory details.
|
||||
|
||||
|
||||
# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
|
||||
TRACE_HEADER_DESC = Descriptor([
|
||||
("magic", "u64"),
|
||||
("size", "u64"),
|
||||
@ -520,6 +543,7 @@ TRACE_HEADER_DESC = Descriptor([
|
||||
])
|
||||
|
||||
|
||||
# Reference: /usr/include/linux/perf_event.h
|
||||
PERF_EVENT_ATTR_DESC = Descriptor([
|
||||
("type", "u32"),
|
||||
("size", "u32"),
|
||||
@ -529,12 +553,13 @@ PERF_EVENT_ATTR_DESC = Descriptor([
|
||||
("read_format", "u64"),
|
||||
("flags", "u64"),
|
||||
("wakeup_events_or_watermark", "u32"),
|
||||
("bt_type", "u32"),
|
||||
("bp_type", "u32"),
|
||||
("bp_addr", "u64"),
|
||||
("bp_len", "u64"),
|
||||
("bp_len", "u64")
|
||||
])
|
||||
|
||||
|
||||
# Reference: /usr/include/linux/perf_event.h
|
||||
PERF_EVENT_HEADER_DESC = Descriptor([
|
||||
("type", "u32"),
|
||||
("misc", "u16"),
|
||||
@ -542,6 +567,7 @@ PERF_EVENT_HEADER_DESC = Descriptor([
|
||||
])
|
||||
|
||||
|
||||
# Reference: kernel/events/core.c
|
||||
PERF_MMAP_EVENT_BODY_DESC = Descriptor([
|
||||
("pid", "u32"),
|
||||
("tid", "u32"),
|
||||
@ -566,6 +592,7 @@ PERF_SAMPLE_STREAM_ID = 1 << 9
|
||||
PERF_SAMPLE_RAW = 1 << 10
|
||||
|
||||
|
||||
# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
|
||||
PERF_SAMPLE_EVENT_BODY_FIELDS = [
|
||||
("ip", "u64", PERF_SAMPLE_IP),
|
||||
("pid", "u32", PERF_SAMPLE_TID),
|
||||
@ -702,8 +729,12 @@ class LibraryRepo(object):
|
||||
# Unfortunately, section headers span two lines, so we have to
|
||||
# keep the just seen section name (from the first line in each
|
||||
# section header) in the after_section variable.
|
||||
if mmap_info.filename.endswith(".ko"):
|
||||
dynamic_symbols = ""
|
||||
else:
|
||||
dynamic_symbols = "-T"
|
||||
process = subprocess.Popen(
|
||||
"%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
|
||||
"%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
|
||||
shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
pipe = process.stdout
|
||||
after_section = None
|
||||
@ -795,7 +826,7 @@ def PrintReport(code_map, library_repo, arch, ticks, options):
|
||||
code.PrintAnnotated(arch, options)
|
||||
print
|
||||
print "Ticks per library:"
|
||||
mmap_infos = [m for m in library_repo.infos]
|
||||
mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
|
||||
mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
|
||||
for mmap_info in mmap_infos:
|
||||
mmap_ticks = mmap_info.ticks
|
||||
@ -885,6 +916,9 @@ if __name__ == "__main__":
|
||||
ticks = 0
|
||||
missed_ticks = 0
|
||||
really_missed_ticks = 0
|
||||
optimized_ticks = 0
|
||||
generated_ticks = 0
|
||||
v8_internal_ticks = 0
|
||||
mmap_time = 0
|
||||
sample_time = 0
|
||||
|
||||
@ -928,6 +962,12 @@ if __name__ == "__main__":
|
||||
code = code_map.Find(sample.ip)
|
||||
if code:
|
||||
code.Tick(sample.ip)
|
||||
if code.codetype == Code.OPTIMIZED:
|
||||
optimized_ticks += 1
|
||||
elif code.codetype == Code.FULL_CODEGEN:
|
||||
generated_ticks += 1
|
||||
elif code.codetype == Code.V8INTERNAL:
|
||||
v8_internal_ticks += 1
|
||||
else:
|
||||
missed_ticks += 1
|
||||
if not library_repo.Tick(sample.ip) and not code:
|
||||
@ -947,12 +987,21 @@ if __name__ == "__main__":
|
||||
PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
|
||||
|
||||
if not options.quiet:
|
||||
def PrintTicks(number, total, description):
|
||||
print("%10d %5.1f%% ticks in %s" %
|
||||
(number, 100.0*number/total, description))
|
||||
print
|
||||
print "Stats:"
|
||||
print "%10d total trace events" % events
|
||||
print "%10d total ticks" % ticks
|
||||
print "%10d ticks not in symbols" % missed_ticks
|
||||
print "%10d unaccounted ticks" % really_missed_ticks
|
||||
unaccounted = "unaccounted ticks"
|
||||
if really_missed_ticks > 0:
|
||||
unaccounted += " (probably in the kernel, try --kernel)"
|
||||
PrintTicks(really_missed_ticks, ticks, unaccounted)
|
||||
PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
|
||||
PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
|
||||
PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
|
||||
print "%10d total symbols" % len([c for c in code_map.AllCode()])
|
||||
print "%10d used symbols" % len([c for c in code_map.UsedCode()])
|
||||
print "%9.2fs library processing time" % mmap_time
|
||||
|
69
tools/run-llprof.sh
Executable file
69
tools/run-llprof.sh
Executable file
@ -0,0 +1,69 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2012 the V8 project authors. All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
# * Neither the name of Google Inc. nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
########## Global variable definitions
|
||||
|
||||
# Ensure that <your CPU clock> / $SAMPLE_EVERY_N_CYCLES < $MAXIMUM_SAMPLE_RATE.
|
||||
MAXIMUM_SAMPLE_RATE=10000000
|
||||
SAMPLE_EVERY_N_CYCLES=10000
|
||||
SAMPLE_RATE_CONFIG_FILE="/proc/sys/kernel/perf_event_max_sample_rate"
|
||||
KERNEL_MAP_CONFIG_FILE="/proc/sys/kernel/kptr_restrict"
|
||||
|
||||
########## Usage
|
||||
|
||||
usage() {
|
||||
cat << EOF
|
||||
usage: $0 <benchmark_command>
|
||||
|
||||
Executes <benchmark_command> under observation by the kernel's "perf" \
|
||||
framework, then calls the low level tick processor to analyze the results.
|
||||
EOF
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########## Actual script execution
|
||||
|
||||
ACTUAL_SAMPLE_RATE=$(cat $SAMPLE_RATE_CONFIG_FILE)
|
||||
if [ "$ACTUAL_SAMPLE_RATE" -lt "$MAXIMUM_SAMPLE_RATE" ] ; then
|
||||
echo "Setting appropriate maximum sample rate..."
|
||||
echo $MAXIMUM_SAMPLE_RATE | sudo tee $SAMPLE_RATE_CONFIG_FILE
|
||||
fi
|
||||
|
||||
ACTUAL_KERNEL_MAP_RESTRICTION=$(cat $KERNEL_MAP_CONFIG_FILE)
|
||||
if [ "$ACTUAL_KERNEL_MAP_RESTRICTION" -ne "0" ] ; then
|
||||
echo "Disabling kernel address map restriction..."
|
||||
echo 0 | sudo tee $KERNEL_MAP_CONFIG_FILE
|
||||
fi
|
||||
|
||||
echo "Running benchmark..."
|
||||
perf record -R -e cycles -c $SAMPLE_EVERY_N_CYCLES -f -i $@ --ll-prof
|
Loading…
Reference in New Issue
Block a user