diff --git a/src/profiler/cpu-profiler.cc b/src/profiler/cpu-profiler.cc index 75f6f8a037..50558f77d5 100644 --- a/src/profiler/cpu-profiler.cc +++ b/src/profiler/cpu-profiler.cc @@ -220,6 +220,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag, tag, profiles_->GetFunctionName(name), CodeEntry::kEmptyNamePrefix, CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo, CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start()); + RecordInliningInfo(rec->entry, code); rec->size = code->ExecutableSize(); processor_->Enqueue(evt_rec); } @@ -233,6 +234,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag, tag, profiles_->GetFunctionName(name), CodeEntry::kEmptyNamePrefix, CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo, CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start()); + RecordInliningInfo(rec->entry, code); rec->size = code->ExecutableSize(); processor_->Enqueue(evt_rec); } @@ -249,6 +251,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag, CodeEntry::kEmptyNamePrefix, profiles_->GetName(script_name), CpuProfileNode::kNoLineNumberInfo, CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start()); + RecordInliningInfo(rec->entry, code); if (info) { rec->entry->set_inlined_function_infos(info->inlined_function_infos()); } @@ -292,6 +295,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag, tag, profiles_->GetFunctionName(shared->DebugName()), CodeEntry::kEmptyNamePrefix, profiles_->GetName(script_name), line, column, line_table, abstract_code->instruction_start()); + RecordInliningInfo(rec->entry, abstract_code); if (info) { rec->entry->set_inlined_function_infos(info->inlined_function_infos()); } @@ -309,6 +313,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag, tag, profiles_->GetName(args_count), "args_count: ", CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo, CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start()); + RecordInliningInfo(rec->entry, code); rec->size = code->ExecutableSize(); processor_->Enqueue(evt_rec); } @@ -379,6 +384,53 @@ void CpuProfiler::SetterCallbackEvent(Name* name, Address entry_point) { processor_->Enqueue(evt_rec); } +void CpuProfiler::RecordInliningInfo(CodeEntry* entry, + AbstractCode* abstract_code) { + if (!abstract_code->IsCode()) return; + Code* code = abstract_code->GetCode(); + if (code->kind() != Code::OPTIMIZED_FUNCTION) return; + DeoptimizationInputData* deopt_input_data = + DeoptimizationInputData::cast(code->deoptimization_data()); + int deopt_count = deopt_input_data->DeoptCount(); + for (int i = 0; i < deopt_count; i++) { + int pc_offset = deopt_input_data->Pc(i)->value(); + if (pc_offset == -1) continue; + int translation_index = deopt_input_data->TranslationIndex(i)->value(); + TranslationIterator it(deopt_input_data->TranslationByteArray(), + translation_index); + Translation::Opcode opcode = static_cast(it.Next()); + DCHECK_EQ(Translation::BEGIN, opcode); + it.Skip(Translation::NumberOfOperandsFor(opcode)); + int depth = 0; + std::vector inline_stack; + while (it.HasNext() && + Translation::BEGIN != + (opcode = static_cast(it.Next()))) { + if (opcode != Translation::JS_FRAME && + opcode != Translation::INTERPRETED_FRAME) { + it.Skip(Translation::NumberOfOperandsFor(opcode)); + continue; + } + it.Next(); // Skip ast_id + int shared_info_id = it.Next(); + it.Next(); // Skip height + SharedFunctionInfo* shared_info = SharedFunctionInfo::cast( + deopt_input_data->LiteralArray()->get(shared_info_id)); + if (!depth++) continue; // Skip the current function itself. + CodeEntry* inline_entry = new CodeEntry( + entry->tag(), profiles_->GetFunctionName(shared_info->DebugName()), + CodeEntry::kEmptyNamePrefix, entry->resource_name(), + CpuProfileNode::kNoLineNumberInfo, + CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start()); + inline_entry->FillFunctionInfo(shared_info); + inline_stack.push_back(inline_entry); + } + if (!inline_stack.empty()) { + entry->AddInlineStack(pc_offset, inline_stack); + DCHECK(inline_stack.empty()); + } + } +} CpuProfiler::CpuProfiler(Isolate* isolate) : isolate_(isolate), diff --git a/src/profiler/cpu-profiler.h b/src/profiler/cpu-profiler.h index cf5ed5165e..86db04608a 100644 --- a/src/profiler/cpu-profiler.h +++ b/src/profiler/cpu-profiler.h @@ -258,6 +258,7 @@ class CpuProfiler : public CodeEventListener { void StopProcessor(); void ResetProfiles(); void LogBuiltins(); + void RecordInliningInfo(CodeEntry* entry, AbstractCode* abstract_code); Isolate* isolate_; base::TimeDelta sampling_interval_; diff --git a/src/profiler/profile-generator.cc b/src/profiler/profile-generator.cc index e9465ba542..70a5a4c423 100644 --- a/src/profiler/profile-generator.cc +++ b/src/profiler/profile-generator.cc @@ -50,6 +50,11 @@ const char* const CodeEntry::kNoDeoptReason = ""; CodeEntry::~CodeEntry() { delete line_info_; + for (auto location : inline_locations_) { + for (auto entry : location.second) { + delete entry; + } + } } @@ -100,6 +105,18 @@ int CodeEntry::GetSourceLine(int pc_offset) const { return v8::CpuProfileNode::kNoLineNumberInfo; } +void CodeEntry::AddInlineStack(int pc_offset, + std::vector& inline_stack) { + // It's better to use std::move to place the vector into the map, + // but it's not supported by the current stdlibc++ on MacOS. + inline_locations_.insert(std::make_pair(pc_offset, std::vector())) + .first->second.swap(inline_stack); +} + +const std::vector* CodeEntry::GetInlineStack(int pc_offset) const { + auto it = inline_locations_.find(pc_offset); + return it != inline_locations_.end() ? &it->second : NULL; +} void CodeEntry::FillFunctionInfo(SharedFunctionInfo* shared) { if (!shared->script()->IsScript()) return; @@ -109,7 +126,6 @@ void CodeEntry::FillFunctionInfo(SharedFunctionInfo* shared) { set_bailout_reason(GetBailoutReason(shared->disable_optimization_reason())); } - CpuProfileDeoptInfo CodeEntry::GetDeoptInfo() { DCHECK(has_deopt_info()); @@ -274,17 +290,14 @@ unsigned ProfileTree::GetFunctionId(const ProfileNode* node) { return static_cast(reinterpret_cast(entry->value)); } -ProfileNode* ProfileTree::AddPathFromEnd(const Vector& path, +ProfileNode* ProfileTree::AddPathFromEnd(const std::vector& path, int src_line, bool update_stats) { ProfileNode* node = root_; CodeEntry* last_entry = NULL; - for (CodeEntry** entry = path.start() + path.length() - 1; - entry != path.start() - 1; - --entry) { - if (*entry != NULL) { - node = node->FindOrAddChild(*entry); - last_entry = *entry; - } + for (auto it = path.rbegin(); it != path.rend(); ++it) { + if (*it == NULL) continue; + last_entry = *it; + node = node->FindOrAddChild(*it); } if (last_entry && last_entry->has_deopt_info()) { node->CollectDeoptInfo(last_entry); @@ -356,7 +369,7 @@ CpuProfile::CpuProfile(Isolate* isolate, const char* title, bool record_samples) top_down_(isolate) {} void CpuProfile::AddPath(base::TimeTicks timestamp, - const Vector& path, int src_line, + const std::vector& path, int src_line, bool update_stats) { ProfileNode* top_frame_node = top_down_.AddPathFromEnd(path, src_line, update_stats); @@ -525,8 +538,8 @@ void CpuProfilesCollection::RemoveProfile(CpuProfile* profile) { } void CpuProfilesCollection::AddPathToCurrentProfiles( - base::TimeTicks timestamp, const Vector& path, int src_line, - bool update_stats) { + base::TimeTicks timestamp, const std::vector& path, + int src_line, bool update_stats) { // As starting / stopping profiles is rare relatively to this // method, we don't bother minimizing the duration of lock holding, // e.g. copying contents of the list to a local vector. @@ -576,12 +589,10 @@ ProfileGenerator::ProfileGenerator(CpuProfilesCollection* profiles) void ProfileGenerator::RecordTickSample(const TickSample& sample) { - // Allocate space for stack frames + pc + function + vm-state. - ScopedVector entries(sample.frames_count + 3); - // As actual number of decoded code entries may vary, initialize - // entries vector with NULL values. - CodeEntry** entry = entries.start(); - memset(entry, 0, entries.length() * sizeof(*entry)); + std::vector entries; + // Conservatively reserve space for stack frames + pc + function + vm-state. + // There could in fact be more of them because of inlined entries. + entries.reserve(sample.frames_count + 3); // The ProfileNode knows nothing about all versions of generated code for // the same JS function. The line number information associated with @@ -597,7 +608,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) { // Don't use PC when in external callback code, as it can point // inside callback's code, and we will erroneously report // that a callback calls itself. - *entry++ = code_map_.FindEntry(sample.external_callback); + entries.push_back(code_map_.FindEntry(sample.external_callback)); } else { CodeEntry* pc_entry = code_map_.FindEntry(sample.pc); // If there is no pc_entry we're likely in native code. @@ -620,7 +631,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) { src_line = pc_entry->line_number(); } src_line_not_found = false; - *entry++ = pc_entry; + entries.push_back(pc_entry); if (pc_entry->builtin_id() == Builtins::kFunctionPrototypeApply || pc_entry->builtin_id() == Builtins::kFunctionPrototypeCall) { @@ -631,7 +642,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) { // former case we don't so we simply replace the frame with // 'unresolved' entry. if (sample.top_frame_type == StackFrame::JAVA_SCRIPT) { - *entry++ = unresolved_entry_; + entries.push_back(unresolved_entry_); } } } @@ -640,35 +651,43 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) { for (const Address *stack_pos = sample.stack, *stack_end = stack_pos + sample.frames_count; stack_pos != stack_end; ++stack_pos) { - *entry = code_map_.FindEntry(*stack_pos); + CodeEntry* entry = code_map_.FindEntry(*stack_pos); - // Skip unresolved frames (e.g. internal frame) and get source line of - // the first JS caller. - if (src_line_not_found && *entry) { + if (entry) { + // Find out if the entry has an inlining stack associated. int pc_offset = - static_cast(*stack_pos - (*entry)->instruction_start()); - src_line = (*entry)->GetSourceLine(pc_offset); - if (src_line == v8::CpuProfileNode::kNoLineNumberInfo) { - src_line = (*entry)->line_number(); + static_cast(*stack_pos - entry->instruction_start()); + const std::vector* inline_stack = + entry->GetInlineStack(pc_offset); + if (inline_stack) { + entries.insert(entries.end(), inline_stack->rbegin(), + inline_stack->rend()); + } + // Skip unresolved frames (e.g. internal frame) and get source line of + // the first JS caller. + if (src_line_not_found) { + src_line = entry->GetSourceLine(pc_offset); + if (src_line == v8::CpuProfileNode::kNoLineNumberInfo) { + src_line = entry->line_number(); + } + src_line_not_found = false; } - src_line_not_found = false; } - - entry++; + entries.push_back(entry); } } if (FLAG_prof_browser_mode) { bool no_symbolized_entries = true; - for (CodeEntry** e = entries.start(); e != entry; ++e) { - if (*e != NULL) { + for (auto e : entries) { + if (e != NULL) { no_symbolized_entries = false; break; } } // If no frames were symbolized, put the VM state entry in. if (no_symbolized_entries) { - *entry++ = EntryForVMState(sample.state); + entries.push_back(EntryForVMState(sample.state)); } } diff --git a/src/profiler/profile-generator.h b/src/profiler/profile-generator.h index 3c976d6292..194b490929 100644 --- a/src/profiler/profile-generator.h +++ b/src/profiler/profile-generator.h @@ -99,7 +99,11 @@ class CodeEntry { int GetSourceLine(int pc_offset) const; + void AddInlineStack(int pc_offset, std::vector& inline_stack); + const std::vector* GetInlineStack(int pc_offset) const; + Address instruction_start() const { return instruction_start_; } + Logger::LogEventsAndTags tag() const { return TagField::decode(bit_field_); } static const char* const kEmptyNamePrefix; static const char* const kEmptyResourceName; @@ -109,7 +113,6 @@ class CodeEntry { private: class TagField : public BitField {}; class BuiltinIdField : public BitField {}; - Logger::LogEventsAndTags tag() const { return TagField::decode(bit_field_); } uint32_t bit_field_; const char* name_prefix_; @@ -125,6 +128,8 @@ class CodeEntry { size_t pc_offset_; JITLineInfoTable* line_info_; Address instruction_start_; + // Should be an unordered_map, but it doesn't currently work on Win & MacOS. + std::map> inline_locations_; std::vector inlined_function_infos_; @@ -191,7 +196,7 @@ class ProfileTree { ~ProfileTree(); ProfileNode* AddPathFromEnd( - const Vector& path, + const std::vector& path, int src_line = v8::CpuProfileNode::kNoLineNumberInfo, bool update_stats = true); ProfileNode* root() const { return root_; } @@ -225,7 +230,7 @@ class CpuProfile { CpuProfile(Isolate* isolate, const char* title, bool record_samples); // Add pc -> ... -> main() call path to the profile. - void AddPath(base::TimeTicks timestamp, const Vector& path, + void AddPath(base::TimeTicks timestamp, const std::vector& path, int src_line, bool update_stats); void CalculateTotalTicksAndSamplingRate(); @@ -334,8 +339,8 @@ class CpuProfilesCollection { // Called from profile generator thread. void AddPathToCurrentProfiles(base::TimeTicks timestamp, - const Vector& path, int src_line, - bool update_stats); + const std::vector& path, + int src_line, bool update_stats); // Limits the number of profiles that can be simultaneously collected. static const int kMaxSimultaneousProfiles = 100; diff --git a/test/cctest/test-cpu-profiler.cc b/test/cctest/test-cpu-profiler.cc index e1f7e6adb9..79ac32e4e2 100644 --- a/test/cctest/test-cpu-profiler.cc +++ b/test/cctest/test-cpu-profiler.cc @@ -1504,6 +1504,68 @@ TEST(JsNativeJsRuntimeJsSampleMultiple) { profile->Delete(); } +static const char* inlining_test_source = + "%NeverOptimizeFunction(action);\n" + "%NeverOptimizeFunction(start);\n" + "%OptimizeFunctionOnNextCall(level1);\n" + "%OptimizeFunctionOnNextCall(level2);\n" + "%OptimizeFunctionOnNextCall(level3);\n" + "var finish = false;\n" + "function action(n) {\n" + " var s = 0;\n" + " for (var i = 0; i < n; ++i) s += i*i*i;\n" + " if (finish)\n" + " startProfiling('my_profile');\n" + " return s;\n" + "}\n" + "function level3() { return action(100); }\n" + "function level2() { return level3() * 2; }\n" + "function level1() { return level2(); }\n" + "function start() {\n" + " var n = 100;\n" + " while (--n)\n" + " level1();\n" + " finish = true;\n" + " level1();\n" + "}"; + +// The test check multiple entrances/exits between JS and native code. +// +// [Top down]: +// (root) #0 1 +// start #16 3 +// level1 #0 4 +// level2 #16 5 +// level3 #16 6 +// action #16 7 +// (program) #0 2 +TEST(Inlining) { + i::FLAG_allow_natives_syntax = true; + v8::HandleScope scope(CcTest::isolate()); + v8::Local env = CcTest::NewContext(PROFILER_EXTENSION); + v8::Context::Scope context_scope(env); + + CompileRun(inlining_test_source); + v8::Local function = GetFunction(env, "start"); + + v8::CpuProfiler* cpu_profiler = env->GetIsolate()->GetCpuProfiler(); + v8::Local profile_name = v8_str("my_profile"); + function->Call(env, env->Global(), 0, NULL).ToLocalChecked(); + v8::CpuProfile* profile = cpu_profiler->StopProfiling(profile_name); + CHECK(profile); + // Dump collected profile to have a better diagnostic in case of failure. + reinterpret_cast(profile)->Print(); + + const v8::CpuProfileNode* root = profile->GetTopDownRoot(); + const v8::CpuProfileNode* start_node = GetChild(env, root, "start"); + const v8::CpuProfileNode* level1_node = GetChild(env, start_node, "level1"); + const v8::CpuProfileNode* level2_node = GetChild(env, level1_node, "level2"); + const v8::CpuProfileNode* level3_node = GetChild(env, level2_node, "level3"); + GetChild(env, level3_node, "action"); + + profile->Delete(); +} + // [Top down]: // 0 (root) #0 1 // 2 (program) #0 2 diff --git a/test/cctest/test-profile-generator.cc b/test/cctest/test-profile-generator.cc index 3ab80e2e5f..a048ca72cb 100644 --- a/test/cctest/test-profile-generator.cc +++ b/test/cctest/test-profile-generator.cc @@ -132,7 +132,7 @@ TEST(ProfileTreeAddPathFromEnd) { CHECK(!helper.Walk(&entry3)); CodeEntry* path[] = {NULL, &entry3, NULL, &entry2, NULL, NULL, &entry1, NULL}; - Vector path_vec(path, sizeof(path) / sizeof(path[0])); + std::vector path_vec(path, path + arraysize(path)); tree.AddPathFromEnd(path_vec); CHECK(!helper.Walk(&entry2)); CHECK(!helper.Walk(&entry3)); @@ -162,7 +162,7 @@ TEST(ProfileTreeAddPathFromEnd) { CHECK_EQ(2u, node3->self_ticks()); CodeEntry* path2[] = {&entry2, &entry2, &entry1}; - Vector path2_vec(path2, sizeof(path2) / sizeof(path2[0])); + std::vector path2_vec(path2, path2 + arraysize(path2)); tree.AddPathFromEnd(path2_vec); CHECK(!helper.Walk(&entry2)); CHECK(!helper.Walk(&entry3)); @@ -189,8 +189,7 @@ TEST(ProfileTreeCalculateTotalTicks) { CodeEntry entry1(i::Logger::FUNCTION_TAG, "aaa"); CodeEntry* e1_path[] = {&entry1}; - Vector e1_path_vec( - e1_path, sizeof(e1_path) / sizeof(e1_path[0])); + std::vector e1_path_vec(e1_path, e1_path + arraysize(e1_path)); ProfileTree single_child_tree(CcTest::i_isolate()); single_child_tree.AddPathFromEnd(e1_path_vec); @@ -204,8 +203,8 @@ TEST(ProfileTreeCalculateTotalTicks) { CodeEntry entry2(i::Logger::FUNCTION_TAG, "bbb"); CodeEntry* e2_e1_path[] = {&entry2, &entry1}; - Vector e2_e1_path_vec(e2_e1_path, - sizeof(e2_e1_path) / sizeof(e2_e1_path[0])); + std::vector e2_e1_path_vec(e2_e1_path, + e2_e1_path + arraysize(e2_e1_path)); ProfileTree flat_tree(CcTest::i_isolate()); ProfileTreeTestHelper flat_helper(&flat_tree); @@ -227,12 +226,10 @@ TEST(ProfileTreeCalculateTotalTicks) { CHECK_EQ(2u, node1->self_ticks()); CodeEntry* e2_path[] = {&entry2}; - Vector e2_path_vec( - e2_path, sizeof(e2_path) / sizeof(e2_path[0])); + std::vector e2_path_vec(e2_path, e2_path + arraysize(e2_path)); CodeEntry entry3(i::Logger::FUNCTION_TAG, "ccc"); CodeEntry* e3_path[] = {&entry3}; - Vector e3_path_vec( - e3_path, sizeof(e3_path) / sizeof(e3_path[0])); + std::vector e3_path_vec(e3_path, e3_path + arraysize(e3_path)); ProfileTree wide_tree(CcTest::i_isolate()); ProfileTreeTestHelper wide_helper(&wide_tree);