Added the simplest call stack sampling and call profile in tick processor output.
Currently only two stack frames are sampled (current function and its caller). Output of tick processor looks like this: [Call profile]: total call path 15.2% LazyCompile: am3 crypto.js:108 <- LazyCompile: montReduce crypto.js:583 6.5% LazyCompile: am3 crypto.js:108 <- LazyCompile: bnpSquareTo crypto.js:431 2.9% Builtin: KeyedStoreIC_Generic <- LazyCompile: montReduce crypto.js:583 2.3% LazyCompile: am3 crypto.js:108 <- LazyCompile: bnpMultiplyTo crypto.js:415 Tested under Windows, Linux and OS X. Review URL: http://codereview.chromium.org/21403 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1292 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
969d43e785
commit
a23fd3284a
32
src/log.cc
32
src/log.cc
@ -139,12 +139,14 @@ bool Profiler::paused_ = false;
|
||||
//
|
||||
class Ticker: public Sampler {
|
||||
public:
|
||||
explicit Ticker(int interval):
|
||||
Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL) {}
|
||||
explicit Ticker(int interval, unsigned int low_stack_bound):
|
||||
Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL),
|
||||
low_stack_bound_(low_stack_bound) {}
|
||||
|
||||
~Ticker() { if (IsActive()) Stop(); }
|
||||
|
||||
void Tick(TickSample* sample) {
|
||||
if (IsProfiling()) SampleStack(sample);
|
||||
if (profiler_) profiler_->Insert(sample);
|
||||
if (window_) window_->AddState(sample->state);
|
||||
}
|
||||
@ -170,8 +172,21 @@ class Ticker: public Sampler {
|
||||
}
|
||||
|
||||
private:
|
||||
void SampleStack(TickSample* sample) {
|
||||
// Assuming that stack grows from lower addresses
|
||||
if (sample->sp < sample->fp && sample->fp < low_stack_bound_) {
|
||||
sample->InitStack(1);
|
||||
sample->stack[0] = Memory::Address_at(
|
||||
(Address)(sample->fp + StandardFrameConstants::kCallerPCOffset));
|
||||
} else {
|
||||
// FP seems to be in some intermediate state, better discard this sample
|
||||
sample->InitStack(0);
|
||||
}
|
||||
}
|
||||
|
||||
SlidingStateWindow* window_;
|
||||
Profiler* profiler_;
|
||||
unsigned int low_stack_bound_;
|
||||
};
|
||||
|
||||
|
||||
@ -239,9 +254,6 @@ void Profiler::Disengage() {
|
||||
// the thread to terminate.
|
||||
running_ = false;
|
||||
TickSample sample;
|
||||
sample.pc = 0;
|
||||
sample.sp = 0;
|
||||
sample.state = OTHER;
|
||||
Insert(&sample);
|
||||
Join();
|
||||
|
||||
@ -900,6 +912,11 @@ void Logger::TickEvent(TickSample* sample, bool overflow) {
|
||||
if (overflow) {
|
||||
msg.Append(",overflow");
|
||||
}
|
||||
if (*(sample->stack)) {
|
||||
for (size_t i = 0; sample->stack[i]; ++i) {
|
||||
msg.Append(",0x%x", reinterpret_cast<unsigned int>(sample->stack[i]));
|
||||
}
|
||||
}
|
||||
msg.Append('\n');
|
||||
msg.WriteToLogFile();
|
||||
}
|
||||
@ -990,7 +1007,10 @@ bool Logger::Setup() {
|
||||
|
||||
current_state_ = new VMState(OTHER);
|
||||
|
||||
ticker_ = new Ticker(10);
|
||||
// as log is initialized early with V8, we can assume that JS execution
|
||||
// frames can never reach this point on stack
|
||||
int stack_var;
|
||||
ticker_ = new Ticker(10, reinterpret_cast<unsigned int>(&stack_var));
|
||||
|
||||
if (FLAG_sliding_state_window && sliding_state_window_ == NULL) {
|
||||
sliding_state_window_ = new SlidingStateWindow();
|
||||
|
@ -634,9 +634,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
|
||||
#if defined (__arm__) || defined(__thumb__)
|
||||
sample.pc = mcontext.mc_r15;
|
||||
sample.sp = mcontext.mc_r13;
|
||||
sample.fp = mcontext.mc_r11;
|
||||
#else
|
||||
sample.pc = mcontext.mc_eip;
|
||||
sample.sp = mcontext.mc_esp;
|
||||
sample.fp = mcontext.mc_ebp;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -617,9 +617,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
|
||||
#if defined (__arm__) || defined(__thumb__)
|
||||
sample.pc = mcontext.gregs[R15];
|
||||
sample.sp = mcontext.gregs[R13];
|
||||
sample.fp = mcontext.gregs[R11];
|
||||
#else
|
||||
sample.pc = mcontext.gregs[REG_EIP];
|
||||
sample.sp = mcontext.gregs[REG_ESP];
|
||||
sample.fp = mcontext.gregs[REG_EBP];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -583,9 +583,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
|
||||
#if __DARWIN_UNIX03
|
||||
sample.pc = mcontext->__ss.__eip;
|
||||
sample.sp = mcontext->__ss.__esp;
|
||||
sample.fp = mcontext->__ss.__ebp;
|
||||
#else // !__DARWIN_UNIX03
|
||||
sample.pc = mcontext->ss.eip;
|
||||
sample.sp = mcontext->ss.esp;
|
||||
sample.fp = mcontext->ss.ebp;
|
||||
#endif // __DARWIN_UNIX03
|
||||
}
|
||||
|
||||
|
@ -1585,6 +1585,7 @@ class Sampler::PlatformData : public Malloced {
|
||||
// Invoke tick handler with program counter and stack pointer.
|
||||
sample.pc = context.Eip;
|
||||
sample.sp = context.Esp;
|
||||
sample.fp = context.Ebp;
|
||||
}
|
||||
|
||||
// We always sample the VM state.
|
||||
|
@ -422,10 +422,29 @@ class Semaphore {
|
||||
// TickSample captures the information collected for each sample.
|
||||
class TickSample {
|
||||
public:
|
||||
TickSample() : pc(0), sp(0), state(OTHER) {}
|
||||
TickSample() : pc(0), sp(0), fp(0), state(OTHER) {}
|
||||
unsigned int pc; // Instruction pointer.
|
||||
unsigned int sp; // Stack pointer.
|
||||
unsigned int fp; // Frame pointer.
|
||||
StateTag state; // The state of the VM.
|
||||
SmartPointer<Address> stack; // Call stack, null-terminated.
|
||||
|
||||
inline TickSample& operator=(const TickSample& rhs) {
|
||||
if (this == &rhs) return *this;
|
||||
pc = rhs.pc;
|
||||
sp = rhs.sp;
|
||||
fp = rhs.fp;
|
||||
state = rhs.state;
|
||||
DeleteArray(stack.Detach());
|
||||
stack = rhs.stack;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline void InitStack(int depth) {
|
||||
stack = SmartPointer<Address>(NewArray<Address>(depth + 1));
|
||||
// null-terminate
|
||||
stack[depth] = 0;
|
||||
}
|
||||
};
|
||||
|
||||
class Sampler {
|
||||
|
@ -26,6 +26,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import csv, splaytree, sys
|
||||
from operator import itemgetter
|
||||
|
||||
|
||||
class CodeEntry(object):
|
||||
@ -34,9 +35,14 @@ class CodeEntry(object):
|
||||
self.start_addr = start_addr
|
||||
self.tick_count = 0
|
||||
self.name = name
|
||||
self.stacks = {}
|
||||
|
||||
def Tick(self, pc):
|
||||
def Tick(self, pc, stack):
|
||||
self.tick_count += 1
|
||||
if len(stack) > 0:
|
||||
stack.insert(0, self.ToString())
|
||||
stack_key = tuple(stack)
|
||||
self.stacks[stack_key] = self.stacks.setdefault(stack_key, 0) + 1
|
||||
|
||||
def RegionTicks(self):
|
||||
return None
|
||||
@ -69,8 +75,8 @@ class JSCodeEntry(CodeEntry):
|
||||
self.assembler = assembler
|
||||
self.region_ticks = None
|
||||
|
||||
def Tick(self, pc):
|
||||
super(JSCodeEntry, self).Tick(pc)
|
||||
def Tick(self, pc, stack):
|
||||
super(JSCodeEntry, self).Tick(pc, stack)
|
||||
if not pc is None:
|
||||
offset = pc - self.start_addr
|
||||
seen = []
|
||||
@ -162,7 +168,7 @@ class TickProcessor(object):
|
||||
logreader = csv.reader(logfile)
|
||||
for row in logreader:
|
||||
if row[0] == 'tick':
|
||||
self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3]))
|
||||
self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3]), row[4:])
|
||||
elif row[0] == 'code-creation':
|
||||
self.ProcessCodeCreation(row[1], int(row[2], 16), int(row[3]), row[4])
|
||||
elif row[0] == 'code-move':
|
||||
@ -237,25 +243,41 @@ class TickProcessor(object):
|
||||
def IncludeTick(self, pc, sp, state):
|
||||
return (self.included_state is None) or (self.included_state == state)
|
||||
|
||||
def ProcessTick(self, pc, sp, state):
|
||||
def FindEntry(self, pc):
|
||||
page = pc >> 12
|
||||
if page in self.vm_extent:
|
||||
entry = self.cpp_entries.FindGreatestsLessThan(pc)
|
||||
if entry != None:
|
||||
return entry.value
|
||||
else:
|
||||
return entry
|
||||
max = self.js_entries.FindMax()
|
||||
min = self.js_entries.FindMin()
|
||||
if max != None and pc < max.key and pc > min.key:
|
||||
return self.js_entries.FindGreatestsLessThan(pc).value
|
||||
return None
|
||||
|
||||
def ProcessStack(self, stack):
|
||||
result = []
|
||||
for frame in stack:
|
||||
if frame.startswith('0x'):
|
||||
entry = self.FindEntry(int(frame, 16))
|
||||
if entry != None:
|
||||
result.append(entry.ToString())
|
||||
return result
|
||||
|
||||
def ProcessTick(self, pc, sp, state, stack):
|
||||
if not self.IncludeTick(pc, sp, state):
|
||||
self.excluded_number_of_ticks += 1;
|
||||
return
|
||||
self.total_number_of_ticks += 1
|
||||
page = pc >> 12
|
||||
if page in self.vm_extent:
|
||||
entry = self.cpp_entries.FindGreatestsLessThan(pc).value
|
||||
if entry.IsSharedLibraryEntry():
|
||||
self.number_of_library_ticks += 1
|
||||
entry.Tick(None)
|
||||
entry = self.FindEntry(pc)
|
||||
if entry == None:
|
||||
self.unaccounted_number_of_ticks += 1
|
||||
return
|
||||
max = self.js_entries.FindMax()
|
||||
min = self.js_entries.FindMin()
|
||||
if max != None and pc < max.key and pc > min.key:
|
||||
code_obj = self.js_entries.FindGreatestsLessThan(pc).value
|
||||
code_obj.Tick(pc)
|
||||
return
|
||||
self.unaccounted_number_of_ticks += 1
|
||||
if entry.IsSharedLibraryEntry():
|
||||
self.number_of_library_ticks += 1
|
||||
entry.Tick(pc, self.ProcessStack(stack))
|
||||
|
||||
def PrintResults(self):
|
||||
print('Statistical profiling result from %s, (%d ticks, %d unaccounted, %d excluded).' %
|
||||
@ -276,6 +298,11 @@ class TickProcessor(object):
|
||||
# Print the C++ ticks.
|
||||
self.PrintHeader('C++')
|
||||
self.PrintEntries(cpp_entries, lambda e:not e.IsSharedLibraryEntry())
|
||||
# Print call profile.
|
||||
print('\n [Call profile]:')
|
||||
print(' total call path')
|
||||
js_entries.extend(cpp_entries)
|
||||
self.PrintCallProfile(js_entries)
|
||||
|
||||
def PrintHeader(self, header_title):
|
||||
print('\n [%s]:' % header_title)
|
||||
@ -309,5 +336,21 @@ class TickProcessor(object):
|
||||
'name': name
|
||||
})
|
||||
|
||||
def PrintCallProfile(self, entries):
|
||||
all_stacks = {}
|
||||
total_stacks = 0
|
||||
for entry in entries:
|
||||
all_stacks.update(entry.stacks)
|
||||
for count in entry.stacks.itervalues():
|
||||
total_stacks += count
|
||||
all_stacks_items = all_stacks.items();
|
||||
all_stacks_items.sort(key = itemgetter(1), reverse=True)
|
||||
for stack, count in all_stacks_items:
|
||||
total_percentage = count * 100.0 / total_stacks
|
||||
print(' %(total)5.1f%% %(call_path)s' % {
|
||||
'total' : total_percentage,
|
||||
'call_path' : stack[0] + ' <- ' + stack[1]
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')
|
||||
|
Loading…
Reference in New Issue
Block a user