Added support in the profiler for creating 'regions' that cover part

of the generated code.  These can be used by the profiler to
categorize the ticks that occur within generated code and thereby show
more detailed information about where time is spent in generated code.
For instance, this is what the profiler displayed for a simple regexp
benchmark with irregexp-native before:

[JavaScript]:
   total  nonlib   name
   87.2%   87.2%   RegExp: (?:\w*\W+)*

This is what we can display now:

[JavaScript]:
   total  nonlib   name
   87.2%   87.2%   RegExp: (?:\w*\W+)*
                   -  53.0%  56.7% BranchOrBacktrack
                   -  14.9%  59.8% CheckCharacterLT
                   -  13.7%  20.4% CheckStackLimit
                   -   6.7%   6.7% SafeCall
                   -   2.7%   7.0% CheckCharacterGT
                   -   2.4%   2.4% SafeReturn
                   -   2.1%   2.1% LoadCurrentCharacter
                   -   1.8%   1.8% PushRegister
                   -   0.9%   0.9% PopRegister
                   -   0.9%   0.9% AdvanceRegister
                   -   0.3%   0.3% PopCurrentPosition
                   -   0.3%   0.3% CheckGreedyLoop
                   -   0.0%  20.4% PushBacktrack
                   -   0.0%  22.3% CheckCharacter
                   -   0.0%   2.4% IfRegisterLT


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1010 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
christian.plesner.hansen@gmail.com 2008-12-19 13:12:43 +00:00
parent 3dc722b555
commit d7c933e7ae
8 changed files with 176 additions and 5 deletions

View File

@ -344,6 +344,7 @@ void Assembler::GetCode(CodeDesc* desc) {
desc->buffer_size = buffer_size_;
desc->instr_size = pc_offset();
desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
desc->origin = this;
Counters::reloc_info_size.Increment(desc->reloc_size);
}

View File

@ -382,6 +382,22 @@ class RelocIterator: public Malloced {
};
// A stack-allocated code region logs a name for the code generated
// while the region is in effect. This information is used by the
// profiler to categorize ticks within generated code.
class CodeRegion BASE_EMBEDDED {
public:
inline CodeRegion(Assembler* assm, const char *name) : assm_(assm) {
LOG(BeginCodeRegionEvent(this, assm, name));
}
inline ~CodeRegion() {
LOG(EndCodeRegionEvent(this, assm_));
}
private:
Assembler* assm_;
};
//------------------------------------------------------------------------------
// External function

View File

@ -145,6 +145,7 @@ class Assembler;
class BreakableStatement;
class Code;
class CodeGenerator;
class CodeRegion;
class CodeStub;
class Context;
class Debug;
@ -259,6 +260,7 @@ struct CodeDesc {
int buffer_size;
int instr_size;
int reloc_size;
Assembler* origin;
};

View File

@ -1663,6 +1663,7 @@ Object* Heap::CreateCode(const CodeDesc& desc,
// through the self_reference parameter.
code->CopyFrom(desc);
if (sinfo != NULL) sinfo->Serialize(code); // write scope info
LOG(CodeAllocateEvent(code, desc.origin));
#ifdef DEBUG
code->Verify();

View File

@ -32,6 +32,7 @@
#include "log.h"
#include "platform.h"
#include "string-stream.h"
#include "macro-assembler.h"
namespace v8 { namespace internal {
@ -556,6 +557,18 @@ void Logger::CodeCreateEvent(const char* tag, Code* code, int args_count) {
}
void Logger::CodeAllocateEvent(Code* code, Assembler* assem) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log_code) return;
ScopedLock sl(mutex_);
fprintf(logfile_, "code-allocate,0x%x,0x%x\n",
reinterpret_cast<unsigned int>(code->address()),
reinterpret_cast<unsigned int>(assem));
#endif
}
void Logger::CodeMoveEvent(Address from, Address to) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log_code) return;
@ -576,6 +589,33 @@ void Logger::CodeDeleteEvent(Address from) {
}
void Logger::BeginCodeRegionEvent(CodeRegion* region,
Assembler* masm,
const char* name) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log_code) return;
ScopedLock sl(mutex_);
fprintf(logfile_, "begin-code-region,0x%x,0x%x,0x%x,%s\n",
reinterpret_cast<unsigned int>(region),
reinterpret_cast<unsigned int>(masm),
masm->pc_offset(),
name);
#endif
}
void Logger::EndCodeRegionEvent(CodeRegion* region, Assembler* masm) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log_code) return;
ScopedLock sl(mutex_);
fprintf(logfile_, "end-code-region,0x%x,0x%x,0x%x\n",
reinterpret_cast<unsigned int>(region),
reinterpret_cast<unsigned int>(masm),
masm->pc_offset());
#endif
}
void Logger::ResourceEvent(const char* name, const char* tag) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log) return;

View File

@ -160,10 +160,16 @@ class Logger {
static void CodeCreateEvent(const char* tag, Code* code, const char* source);
static void CodeCreateEvent(const char* tag, Code* code, String* name);
static void CodeCreateEvent(const char* tag, Code* code, int args_count);
static void CodeAllocateEvent(Code* code, Assembler* assem);
// Emits a code move event.
static void CodeMoveEvent(Address from, Address to);
// Emits a code delete event.
static void CodeDeleteEvent(Address from);
// Emits region delimiters
static void BeginCodeRegionEvent(CodeRegion* region,
Assembler* masm,
const char* name);
static void EndCodeRegionEvent(CodeRegion* region, Assembler* masm);
// ==== Events logged by --log-gc. ====
// Heap sampling events: start, end, and individual types.

View File

@ -305,6 +305,7 @@ class CodePatcher {
MacroAssembler masm_; // Macro assembler used to generate the code.
};
// -----------------------------------------------------------------------------
// Static helper functions.

View File

@ -35,9 +35,12 @@ class CodeEntry(object):
self.tick_count = 0
self.name = name
def IncrementTickCount(self):
def Tick(self, pc):
self.tick_count += 1
def RegionTicks(self):
return None
def SetStartAddress(self, start_addr):
self.start_addr = start_addr
@ -59,10 +62,45 @@ class SharedLibraryEntry(CodeEntry):
class JSCodeEntry(CodeEntry):
def __init__(self, start_addr, name, type, size):
def __init__(self, start_addr, name, type, size, assembler):
CodeEntry.__init__(self, start_addr, name)
self.type = type
self.size = size
self.assembler = assembler
self.region_ticks = None
def Tick(self, pc):
super(JSCodeEntry, self).Tick(pc)
if not pc is None:
offset = pc - self.start_addr
seen = []
narrowest = None
narrowest_width = None
for region in self.Regions():
if region.Contains(offset):
if (not region.name in seen):
seen.append(region.name)
if narrowest is None or region.Width() < narrowest.Width():
narrowest = region
if len(seen) == 0:
return
if self.region_ticks is None:
self.region_ticks = {}
for name in seen:
if not name in self.region_ticks:
self.region_ticks[name] = [0, 0]
self.region_ticks[name][0] += 1
if name == narrowest.name:
self.region_ticks[name][1] += 1
def RegionTicks(self):
return self.region_ticks
def Regions(self):
if self.assembler:
return self.assembler.regions
else:
return []
def ToString(self):
name = self.name
@ -70,12 +108,39 @@ class JSCodeEntry(CodeEntry):
return self.type + ': ' + name
class CodeRegion(object):
def __init__(self, start_offset, name):
self.start_offset = start_offset
self.name = name
self.end_offset = None
def Contains(self, pc):
return (self.start_offset <= pc) and (pc <= self.end_offset)
def Width(self):
return self.end_offset - self.start_offset
class Assembler(object):
def __init__(self):
# Mapping from region ids to open regions
self.pending_regions = {}
self.regions = []
class TickProcessor(object):
def __init__(self):
self.log_file = ''
self.deleted_code = []
self.vm_extent = {}
# Map from assembler ids to the pending assembler objects
self.pending_assemblers = {}
# Map from code addresses the have been allocated but not yet officially
# created to their assemblers.
self.assemblers = {}
self.js_entries = splaytree.SplayTree()
self.cpp_entries = splaytree.SplayTree()
self.total_number_of_ticks = 0
@ -104,6 +169,12 @@ class TickProcessor(object):
elif row[0] == 'shared-library':
self.AddSharedLibraryEntry(row[1], int(row[2], 16), int(row[3], 16))
self.ParseVMSymbols(row[1], int(row[2], 16), int(row[3], 16))
elif row[0] == 'begin-code-region':
self.ProcessBeginCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16), row[4])
elif row[0] == 'end-code-region':
self.ProcessEndCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16))
elif row[0] == 'code-allocate':
self.ProcessCodeAllocate(int(row[1], 16), int(row[2], 16))
finally:
logfile.close()
@ -121,8 +192,17 @@ class TickProcessor(object):
def ParseVMSymbols(self, filename, start, end):
return
def ProcessCodeAllocate(self, addr, assem):
if assem in self.pending_assemblers:
assembler = self.pending_assemblers.pop(assem)
self.assemblers[addr] = assembler
def ProcessCodeCreation(self, type, addr, size, name):
self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size))
if addr in self.assemblers:
assembler = self.assemblers.pop(addr)
else:
assembler = None
self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size, assembler))
def ProcessCodeMove(self, from_addr, to_addr):
try:
@ -139,6 +219,18 @@ class TickProcessor(object):
except 'KeyNotFound':
print('Code delete event for unknown code: 0x%x' % from_addr)
def ProcessBeginCodeRegion(self, id, assm, start, name):
if not assm in self.pending_assemblers:
self.pending_assemblers[assm] = Assembler()
assembler = self.pending_assemblers[assm]
assembler.pending_regions[id] = CodeRegion(start, name)
def ProcessEndCodeRegion(self, id, assm, end):
assm = self.pending_assemblers[assm]
region = assm.pending_regions.pop(id)
region.end_offset = end
assm.regions.append(region)
def IncludeTick(self, pc, sp, state):
return (self.included_state is None) or (self.included_state == state)
@ -152,12 +244,13 @@ class TickProcessor(object):
entry = self.cpp_entries.FindGreatestsLessThan(pc).value
if entry.IsSharedLibraryEntry():
self.number_of_library_ticks += 1
entry.IncrementTickCount()
entry.Tick(None)
return
max = self.js_entries.FindMax()
min = self.js_entries.FindMin()
if max != None and pc < max.key and pc > min.key:
self.js_entries.FindGreatestsLessThan(pc).value.IncrementTickCount()
code_obj = self.js_entries.FindGreatestsLessThan(pc).value
code_obj.Tick(pc)
return
self.unaccounted_number_of_ticks += 1
@ -201,6 +294,17 @@ class TickProcessor(object):
'nonlib' : non_library_percentage,
'name' : entry.ToString()
})
region_ticks = entry.RegionTicks()
if not region_ticks is None:
items = region_ticks.items()
items.sort(key=lambda e: e[1][1], reverse=True)
for (name, ticks) in items:
print(' flat cum')
print(' %(flat)5.1f%% %(accum)5.1f%% %(name)s' % {
'flat' : ticks[1] * 100.0 / entry.tick_count,
'accum' : ticks[0] * 100.0 / entry.tick_count,
'name': name
})
if __name__ == '__main__':
sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')