From d7c933e7aec30b6455de7672a7fae3bbbb9bfb77 Mon Sep 17 00:00:00 2001 From: "christian.plesner.hansen@gmail.com" Date: Fri, 19 Dec 2008 13:12:43 +0000 Subject: [PATCH] Added support in the profiler for creating 'regions' that cover part of the generated code. These can be used by the profiler to categorize the ticks that occur within generated code and thereby show more detailed information about where time is spent in generated code. For instance, this is what the profiler displayed for a simple regexp benchmark with irregexp-native before: [JavaScript]: total nonlib name 87.2% 87.2% RegExp: (?:\w*\W+)* This is what we can display now: [JavaScript]: total nonlib name 87.2% 87.2% RegExp: (?:\w*\W+)* - 53.0% 56.7% BranchOrBacktrack - 14.9% 59.8% CheckCharacterLT - 13.7% 20.4% CheckStackLimit - 6.7% 6.7% SafeCall - 2.7% 7.0% CheckCharacterGT - 2.4% 2.4% SafeReturn - 2.1% 2.1% LoadCurrentCharacter - 1.8% 1.8% PushRegister - 0.9% 0.9% PopRegister - 0.9% 0.9% AdvanceRegister - 0.3% 0.3% PopCurrentPosition - 0.3% 0.3% CheckGreedyLoop - 0.0% 20.4% PushBacktrack - 0.0% 22.3% CheckCharacter - 0.0% 2.4% IfRegisterLT git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1010 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/assembler-ia32.cc | 1 + src/assembler.h | 16 ++++++ src/globals.h | 2 + src/heap.cc | 1 + src/log.cc | 40 +++++++++++++ src/log.h | 6 ++ src/macro-assembler-ia32.h | 1 + tools/tickprocessor.py | 114 +++++++++++++++++++++++++++++++++++-- 8 files changed, 176 insertions(+), 5 deletions(-) diff --git a/src/assembler-ia32.cc b/src/assembler-ia32.cc index c04fd5758b..f487c418d9 100644 --- a/src/assembler-ia32.cc +++ b/src/assembler-ia32.cc @@ -344,6 +344,7 @@ void Assembler::GetCode(CodeDesc* desc) { desc->buffer_size = buffer_size_; desc->instr_size = pc_offset(); desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos(); + desc->origin = this; Counters::reloc_info_size.Increment(desc->reloc_size); } diff --git a/src/assembler.h b/src/assembler.h index 971b950227..7bd117cdc9 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -382,6 +382,22 @@ class RelocIterator: public Malloced { }; +// A stack-allocated code region logs a name for the code generated +// while the region is in effect. This information is used by the +// profiler to categorize ticks within generated code. +class CodeRegion BASE_EMBEDDED { + public: + inline CodeRegion(Assembler* assm, const char *name) : assm_(assm) { + LOG(BeginCodeRegionEvent(this, assm, name)); + } + inline ~CodeRegion() { + LOG(EndCodeRegionEvent(this, assm_)); + } + private: + Assembler* assm_; +}; + + //------------------------------------------------------------------------------ // External function diff --git a/src/globals.h b/src/globals.h index e174bf303e..2b5f3acf43 100644 --- a/src/globals.h +++ b/src/globals.h @@ -145,6 +145,7 @@ class Assembler; class BreakableStatement; class Code; class CodeGenerator; +class CodeRegion; class CodeStub; class Context; class Debug; @@ -259,6 +260,7 @@ struct CodeDesc { int buffer_size; int instr_size; int reloc_size; + Assembler* origin; }; diff --git a/src/heap.cc b/src/heap.cc index a10e3f9978..c4a860bedc 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -1663,6 +1663,7 @@ Object* Heap::CreateCode(const CodeDesc& desc, // through the self_reference parameter. code->CopyFrom(desc); if (sinfo != NULL) sinfo->Serialize(code); // write scope info + LOG(CodeAllocateEvent(code, desc.origin)); #ifdef DEBUG code->Verify(); diff --git a/src/log.cc b/src/log.cc index 4fa398c8e2..fbefec9829 100644 --- a/src/log.cc +++ b/src/log.cc @@ -32,6 +32,7 @@ #include "log.h" #include "platform.h" #include "string-stream.h" +#include "macro-assembler.h" namespace v8 { namespace internal { @@ -556,6 +557,18 @@ void Logger::CodeCreateEvent(const char* tag, Code* code, int args_count) { } +void Logger::CodeAllocateEvent(Code* code, Assembler* assem) { +#ifdef ENABLE_LOGGING_AND_PROFILING + if (logfile_ == NULL || !FLAG_log_code) return; + ScopedLock sl(mutex_); + + fprintf(logfile_, "code-allocate,0x%x,0x%x\n", + reinterpret_cast(code->address()), + reinterpret_cast(assem)); +#endif +} + + void Logger::CodeMoveEvent(Address from, Address to) { #ifdef ENABLE_LOGGING_AND_PROFILING if (logfile_ == NULL || !FLAG_log_code) return; @@ -576,6 +589,33 @@ void Logger::CodeDeleteEvent(Address from) { } +void Logger::BeginCodeRegionEvent(CodeRegion* region, + Assembler* masm, + const char* name) { +#ifdef ENABLE_LOGGING_AND_PROFILING + if (logfile_ == NULL || !FLAG_log_code) return; + ScopedLock sl(mutex_); + fprintf(logfile_, "begin-code-region,0x%x,0x%x,0x%x,%s\n", + reinterpret_cast(region), + reinterpret_cast(masm), + masm->pc_offset(), + name); +#endif +} + + +void Logger::EndCodeRegionEvent(CodeRegion* region, Assembler* masm) { +#ifdef ENABLE_LOGGING_AND_PROFILING + if (logfile_ == NULL || !FLAG_log_code) return; + ScopedLock sl(mutex_); + fprintf(logfile_, "end-code-region,0x%x,0x%x,0x%x\n", + reinterpret_cast(region), + reinterpret_cast(masm), + masm->pc_offset()); +#endif +} + + void Logger::ResourceEvent(const char* name, const char* tag) { #ifdef ENABLE_LOGGING_AND_PROFILING if (logfile_ == NULL || !FLAG_log) return; diff --git a/src/log.h b/src/log.h index 89d3e8a3d0..62571733f9 100644 --- a/src/log.h +++ b/src/log.h @@ -160,10 +160,16 @@ class Logger { static void CodeCreateEvent(const char* tag, Code* code, const char* source); static void CodeCreateEvent(const char* tag, Code* code, String* name); static void CodeCreateEvent(const char* tag, Code* code, int args_count); + static void CodeAllocateEvent(Code* code, Assembler* assem); // Emits a code move event. static void CodeMoveEvent(Address from, Address to); // Emits a code delete event. static void CodeDeleteEvent(Address from); + // Emits region delimiters + static void BeginCodeRegionEvent(CodeRegion* region, + Assembler* masm, + const char* name); + static void EndCodeRegionEvent(CodeRegion* region, Assembler* masm); // ==== Events logged by --log-gc. ==== // Heap sampling events: start, end, and individual types. diff --git a/src/macro-assembler-ia32.h b/src/macro-assembler-ia32.h index 8bcc651c15..b8fb3b90b5 100644 --- a/src/macro-assembler-ia32.h +++ b/src/macro-assembler-ia32.h @@ -305,6 +305,7 @@ class CodePatcher { MacroAssembler masm_; // Macro assembler used to generate the code. }; + // ----------------------------------------------------------------------------- // Static helper functions. diff --git a/tools/tickprocessor.py b/tools/tickprocessor.py index e3df951ef0..5ef6b5f4fe 100644 --- a/tools/tickprocessor.py +++ b/tools/tickprocessor.py @@ -35,9 +35,12 @@ class CodeEntry(object): self.tick_count = 0 self.name = name - def IncrementTickCount(self): + def Tick(self, pc): self.tick_count += 1 + def RegionTicks(self): + return None + def SetStartAddress(self, start_addr): self.start_addr = start_addr @@ -59,10 +62,45 @@ class SharedLibraryEntry(CodeEntry): class JSCodeEntry(CodeEntry): - def __init__(self, start_addr, name, type, size): + def __init__(self, start_addr, name, type, size, assembler): CodeEntry.__init__(self, start_addr, name) self.type = type self.size = size + self.assembler = assembler + self.region_ticks = None + + def Tick(self, pc): + super(JSCodeEntry, self).Tick(pc) + if not pc is None: + offset = pc - self.start_addr + seen = [] + narrowest = None + narrowest_width = None + for region in self.Regions(): + if region.Contains(offset): + if (not region.name in seen): + seen.append(region.name) + if narrowest is None or region.Width() < narrowest.Width(): + narrowest = region + if len(seen) == 0: + return + if self.region_ticks is None: + self.region_ticks = {} + for name in seen: + if not name in self.region_ticks: + self.region_ticks[name] = [0, 0] + self.region_ticks[name][0] += 1 + if name == narrowest.name: + self.region_ticks[name][1] += 1 + + def RegionTicks(self): + return self.region_ticks + + def Regions(self): + if self.assembler: + return self.assembler.regions + else: + return [] def ToString(self): name = self.name @@ -70,12 +108,39 @@ class JSCodeEntry(CodeEntry): return self.type + ': ' + name +class CodeRegion(object): + + def __init__(self, start_offset, name): + self.start_offset = start_offset + self.name = name + self.end_offset = None + + def Contains(self, pc): + return (self.start_offset <= pc) and (pc <= self.end_offset) + + def Width(self): + return self.end_offset - self.start_offset + + +class Assembler(object): + + def __init__(self): + # Mapping from region ids to open regions + self.pending_regions = {} + self.regions = [] + + class TickProcessor(object): def __init__(self): self.log_file = '' self.deleted_code = [] self.vm_extent = {} + # Map from assembler ids to the pending assembler objects + self.pending_assemblers = {} + # Map from code addresses the have been allocated but not yet officially + # created to their assemblers. + self.assemblers = {} self.js_entries = splaytree.SplayTree() self.cpp_entries = splaytree.SplayTree() self.total_number_of_ticks = 0 @@ -104,6 +169,12 @@ class TickProcessor(object): elif row[0] == 'shared-library': self.AddSharedLibraryEntry(row[1], int(row[2], 16), int(row[3], 16)) self.ParseVMSymbols(row[1], int(row[2], 16), int(row[3], 16)) + elif row[0] == 'begin-code-region': + self.ProcessBeginCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16), row[4]) + elif row[0] == 'end-code-region': + self.ProcessEndCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16)) + elif row[0] == 'code-allocate': + self.ProcessCodeAllocate(int(row[1], 16), int(row[2], 16)) finally: logfile.close() @@ -121,8 +192,17 @@ class TickProcessor(object): def ParseVMSymbols(self, filename, start, end): return + def ProcessCodeAllocate(self, addr, assem): + if assem in self.pending_assemblers: + assembler = self.pending_assemblers.pop(assem) + self.assemblers[addr] = assembler + def ProcessCodeCreation(self, type, addr, size, name): - self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size)) + if addr in self.assemblers: + assembler = self.assemblers.pop(addr) + else: + assembler = None + self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size, assembler)) def ProcessCodeMove(self, from_addr, to_addr): try: @@ -139,6 +219,18 @@ class TickProcessor(object): except 'KeyNotFound': print('Code delete event for unknown code: 0x%x' % from_addr) + def ProcessBeginCodeRegion(self, id, assm, start, name): + if not assm in self.pending_assemblers: + self.pending_assemblers[assm] = Assembler() + assembler = self.pending_assemblers[assm] + assembler.pending_regions[id] = CodeRegion(start, name) + + def ProcessEndCodeRegion(self, id, assm, end): + assm = self.pending_assemblers[assm] + region = assm.pending_regions.pop(id) + region.end_offset = end + assm.regions.append(region) + def IncludeTick(self, pc, sp, state): return (self.included_state is None) or (self.included_state == state) @@ -152,12 +244,13 @@ class TickProcessor(object): entry = self.cpp_entries.FindGreatestsLessThan(pc).value if entry.IsSharedLibraryEntry(): self.number_of_library_ticks += 1 - entry.IncrementTickCount() + entry.Tick(None) return max = self.js_entries.FindMax() min = self.js_entries.FindMin() if max != None and pc < max.key and pc > min.key: - self.js_entries.FindGreatestsLessThan(pc).value.IncrementTickCount() + code_obj = self.js_entries.FindGreatestsLessThan(pc).value + code_obj.Tick(pc) return self.unaccounted_number_of_ticks += 1 @@ -201,6 +294,17 @@ class TickProcessor(object): 'nonlib' : non_library_percentage, 'name' : entry.ToString() }) + region_ticks = entry.RegionTicks() + if not region_ticks is None: + items = region_ticks.items() + items.sort(key=lambda e: e[1][1], reverse=True) + for (name, ticks) in items: + print(' flat cum') + print(' %(flat)5.1f%% %(accum)5.1f%% %(name)s' % { + 'flat' : ticks[1] * 100.0 / entry.tick_count, + 'accum' : ticks[0] * 100.0 / entry.tick_count, + 'name': name + }) if __name__ == '__main__': sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')