Added the simplest call stack sampling and call profile in tick processor output.

Currently only two stack frames are sampled (current function and its caller).

Output of tick processor looks like this:

 [Call profile]:
   total  call path
   15.2%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: montReduce crypto.js:583
    6.5%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: bnpSquareTo crypto.js:431
    2.9%  Builtin: KeyedStoreIC_Generic  <-  LazyCompile: montReduce crypto.js:583
    2.3%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: bnpMultiplyTo crypto.js:415

Tested under Windows, Linux and OS X.

Review URL: http://codereview.chromium.org/21403


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1292 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
mikhail.naganov@gmail.com 2009-02-17 14:45:45 +00:00
parent 969d43e785
commit a23fd3284a
7 changed files with 114 additions and 25 deletions

View File

@ -139,12 +139,14 @@ bool Profiler::paused_ = false;
//
class Ticker: public Sampler {
public:
explicit Ticker(int interval):
Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL) {}
explicit Ticker(int interval, unsigned int low_stack_bound):
Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL),
low_stack_bound_(low_stack_bound) {}
~Ticker() { if (IsActive()) Stop(); }
void Tick(TickSample* sample) {
if (IsProfiling()) SampleStack(sample);
if (profiler_) profiler_->Insert(sample);
if (window_) window_->AddState(sample->state);
}
@ -170,8 +172,21 @@ class Ticker: public Sampler {
}
private:
void SampleStack(TickSample* sample) {
// Assuming that stack grows from lower addresses
if (sample->sp < sample->fp && sample->fp < low_stack_bound_) {
sample->InitStack(1);
sample->stack[0] = Memory::Address_at(
(Address)(sample->fp + StandardFrameConstants::kCallerPCOffset));
} else {
// FP seems to be in some intermediate state, better discard this sample
sample->InitStack(0);
}
}
SlidingStateWindow* window_;
Profiler* profiler_;
unsigned int low_stack_bound_;
};
@ -239,9 +254,6 @@ void Profiler::Disengage() {
// the thread to terminate.
running_ = false;
TickSample sample;
sample.pc = 0;
sample.sp = 0;
sample.state = OTHER;
Insert(&sample);
Join();
@ -900,6 +912,11 @@ void Logger::TickEvent(TickSample* sample, bool overflow) {
if (overflow) {
msg.Append(",overflow");
}
if (*(sample->stack)) {
for (size_t i = 0; sample->stack[i]; ++i) {
msg.Append(",0x%x", reinterpret_cast<unsigned int>(sample->stack[i]));
}
}
msg.Append('\n');
msg.WriteToLogFile();
}
@ -990,7 +1007,10 @@ bool Logger::Setup() {
current_state_ = new VMState(OTHER);
ticker_ = new Ticker(10);
// as log is initialized early with V8, we can assume that JS execution
// frames can never reach this point on stack
int stack_var;
ticker_ = new Ticker(10, reinterpret_cast<unsigned int>(&stack_var));
if (FLAG_sliding_state_window && sliding_state_window_ == NULL) {
sliding_state_window_ = new SlidingStateWindow();

View File

@ -634,9 +634,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if defined (__arm__) || defined(__thumb__)
sample.pc = mcontext.mc_r15;
sample.sp = mcontext.mc_r13;
sample.fp = mcontext.mc_r11;
#else
sample.pc = mcontext.mc_eip;
sample.sp = mcontext.mc_esp;
sample.fp = mcontext.mc_ebp;
#endif
}

View File

@ -617,9 +617,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if defined (__arm__) || defined(__thumb__)
sample.pc = mcontext.gregs[R15];
sample.sp = mcontext.gregs[R13];
sample.fp = mcontext.gregs[R11];
#else
sample.pc = mcontext.gregs[REG_EIP];
sample.sp = mcontext.gregs[REG_ESP];
sample.fp = mcontext.gregs[REG_EBP];
#endif
}

View File

@ -583,9 +583,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if __DARWIN_UNIX03
sample.pc = mcontext->__ss.__eip;
sample.sp = mcontext->__ss.__esp;
sample.fp = mcontext->__ss.__ebp;
#else // !__DARWIN_UNIX03
sample.pc = mcontext->ss.eip;
sample.sp = mcontext->ss.esp;
sample.fp = mcontext->ss.ebp;
#endif // __DARWIN_UNIX03
}

View File

@ -1585,6 +1585,7 @@ class Sampler::PlatformData : public Malloced {
// Invoke tick handler with program counter and stack pointer.
sample.pc = context.Eip;
sample.sp = context.Esp;
sample.fp = context.Ebp;
}
// We always sample the VM state.

View File

@ -422,10 +422,29 @@ class Semaphore {
// TickSample captures the information collected for each sample.
class TickSample {
public:
TickSample() : pc(0), sp(0), state(OTHER) {}
TickSample() : pc(0), sp(0), fp(0), state(OTHER) {}
unsigned int pc; // Instruction pointer.
unsigned int sp; // Stack pointer.
unsigned int fp; // Frame pointer.
StateTag state; // The state of the VM.
SmartPointer<Address> stack; // Call stack, null-terminated.
inline TickSample& operator=(const TickSample& rhs) {
if (this == &rhs) return *this;
pc = rhs.pc;
sp = rhs.sp;
fp = rhs.fp;
state = rhs.state;
DeleteArray(stack.Detach());
stack = rhs.stack;
return *this;
}
inline void InitStack(int depth) {
stack = SmartPointer<Address>(NewArray<Address>(depth + 1));
// null-terminate
stack[depth] = 0;
}
};
class Sampler {

View File

@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import csv, splaytree, sys
from operator import itemgetter
class CodeEntry(object):
@ -34,9 +35,14 @@ class CodeEntry(object):
self.start_addr = start_addr
self.tick_count = 0
self.name = name
self.stacks = {}
def Tick(self, pc):
def Tick(self, pc, stack):
self.tick_count += 1
if len(stack) > 0:
stack.insert(0, self.ToString())
stack_key = tuple(stack)
self.stacks[stack_key] = self.stacks.setdefault(stack_key, 0) + 1
def RegionTicks(self):
return None
@ -69,8 +75,8 @@ class JSCodeEntry(CodeEntry):
self.assembler = assembler
self.region_ticks = None
def Tick(self, pc):
super(JSCodeEntry, self).Tick(pc)
def Tick(self, pc, stack):
super(JSCodeEntry, self).Tick(pc, stack)
if not pc is None:
offset = pc - self.start_addr
seen = []
@ -162,7 +168,7 @@ class TickProcessor(object):
logreader = csv.reader(logfile)
for row in logreader:
if row[0] == 'tick':
self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3]))
self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3]), row[4:])
elif row[0] == 'code-creation':
self.ProcessCodeCreation(row[1], int(row[2], 16), int(row[3]), row[4])
elif row[0] == 'code-move':
@ -237,25 +243,41 @@ class TickProcessor(object):
def IncludeTick(self, pc, sp, state):
return (self.included_state is None) or (self.included_state == state)
def ProcessTick(self, pc, sp, state):
def FindEntry(self, pc):
page = pc >> 12
if page in self.vm_extent:
entry = self.cpp_entries.FindGreatestsLessThan(pc)
if entry != None:
return entry.value
else:
return entry
max = self.js_entries.FindMax()
min = self.js_entries.FindMin()
if max != None and pc < max.key and pc > min.key:
return self.js_entries.FindGreatestsLessThan(pc).value
return None
def ProcessStack(self, stack):
result = []
for frame in stack:
if frame.startswith('0x'):
entry = self.FindEntry(int(frame, 16))
if entry != None:
result.append(entry.ToString())
return result
def ProcessTick(self, pc, sp, state, stack):
if not self.IncludeTick(pc, sp, state):
self.excluded_number_of_ticks += 1;
return
self.total_number_of_ticks += 1
page = pc >> 12
if page in self.vm_extent:
entry = self.cpp_entries.FindGreatestsLessThan(pc).value
if entry.IsSharedLibraryEntry():
self.number_of_library_ticks += 1
entry.Tick(None)
entry = self.FindEntry(pc)
if entry == None:
self.unaccounted_number_of_ticks += 1
return
max = self.js_entries.FindMax()
min = self.js_entries.FindMin()
if max != None and pc < max.key and pc > min.key:
code_obj = self.js_entries.FindGreatestsLessThan(pc).value
code_obj.Tick(pc)
return
self.unaccounted_number_of_ticks += 1
if entry.IsSharedLibraryEntry():
self.number_of_library_ticks += 1
entry.Tick(pc, self.ProcessStack(stack))
def PrintResults(self):
print('Statistical profiling result from %s, (%d ticks, %d unaccounted, %d excluded).' %
@ -276,6 +298,11 @@ class TickProcessor(object):
# Print the C++ ticks.
self.PrintHeader('C++')
self.PrintEntries(cpp_entries, lambda e:not e.IsSharedLibraryEntry())
# Print call profile.
print('\n [Call profile]:')
print(' total call path')
js_entries.extend(cpp_entries)
self.PrintCallProfile(js_entries)
def PrintHeader(self, header_title):
print('\n [%s]:' % header_title)
@ -309,5 +336,21 @@ class TickProcessor(object):
'name': name
})
def PrintCallProfile(self, entries):
all_stacks = {}
total_stacks = 0
for entry in entries:
all_stacks.update(entry.stacks)
for count in entry.stacks.itervalues():
total_stacks += count
all_stacks_items = all_stacks.items();
all_stacks_items.sort(key = itemgetter(1), reverse=True)
for stack, count in all_stacks_items:
total_percentage = count * 100.0 / total_stacks
print(' %(total)5.1f%% %(call_path)s' % {
'total' : total_percentage,
'call_path' : stack[0] + ' <- ' + stack[1]
})
if __name__ == '__main__':
sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')