Make CPU profiler unwind the inlined functions stack.

BUG=575466
LOG=N

Review URL: https://codereview.chromium.org/1740073002

Cr-Commit-Position: refs/heads/master@{#34376}
This commit is contained in:
alph 2016-02-29 21:59:32 -08:00 committed by Commit bot
parent 4537f09374
commit b01cd96eb5
6 changed files with 186 additions and 50 deletions

View File

@ -220,6 +220,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag,
tag, profiles_->GetFunctionName(name), CodeEntry::kEmptyNamePrefix,
CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo,
CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start());
RecordInliningInfo(rec->entry, code);
rec->size = code->ExecutableSize();
processor_->Enqueue(evt_rec);
}
@ -233,6 +234,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag,
tag, profiles_->GetFunctionName(name), CodeEntry::kEmptyNamePrefix,
CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo,
CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start());
RecordInliningInfo(rec->entry, code);
rec->size = code->ExecutableSize();
processor_->Enqueue(evt_rec);
}
@ -249,6 +251,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag,
CodeEntry::kEmptyNamePrefix, profiles_->GetName(script_name),
CpuProfileNode::kNoLineNumberInfo, CpuProfileNode::kNoColumnNumberInfo,
NULL, code->instruction_start());
RecordInliningInfo(rec->entry, code);
if (info) {
rec->entry->set_inlined_function_infos(info->inlined_function_infos());
}
@ -292,6 +295,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag,
tag, profiles_->GetFunctionName(shared->DebugName()),
CodeEntry::kEmptyNamePrefix, profiles_->GetName(script_name), line,
column, line_table, abstract_code->instruction_start());
RecordInliningInfo(rec->entry, abstract_code);
if (info) {
rec->entry->set_inlined_function_infos(info->inlined_function_infos());
}
@ -309,6 +313,7 @@ void CpuProfiler::CodeCreateEvent(Logger::LogEventsAndTags tag,
tag, profiles_->GetName(args_count), "args_count: ",
CodeEntry::kEmptyResourceName, CpuProfileNode::kNoLineNumberInfo,
CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start());
RecordInliningInfo(rec->entry, code);
rec->size = code->ExecutableSize();
processor_->Enqueue(evt_rec);
}
@ -379,6 +384,53 @@ void CpuProfiler::SetterCallbackEvent(Name* name, Address entry_point) {
processor_->Enqueue(evt_rec);
}
void CpuProfiler::RecordInliningInfo(CodeEntry* entry,
AbstractCode* abstract_code) {
if (!abstract_code->IsCode()) return;
Code* code = abstract_code->GetCode();
if (code->kind() != Code::OPTIMIZED_FUNCTION) return;
DeoptimizationInputData* deopt_input_data =
DeoptimizationInputData::cast(code->deoptimization_data());
int deopt_count = deopt_input_data->DeoptCount();
for (int i = 0; i < deopt_count; i++) {
int pc_offset = deopt_input_data->Pc(i)->value();
if (pc_offset == -1) continue;
int translation_index = deopt_input_data->TranslationIndex(i)->value();
TranslationIterator it(deopt_input_data->TranslationByteArray(),
translation_index);
Translation::Opcode opcode = static_cast<Translation::Opcode>(it.Next());
DCHECK_EQ(Translation::BEGIN, opcode);
it.Skip(Translation::NumberOfOperandsFor(opcode));
int depth = 0;
std::vector<CodeEntry*> inline_stack;
while (it.HasNext() &&
Translation::BEGIN !=
(opcode = static_cast<Translation::Opcode>(it.Next()))) {
if (opcode != Translation::JS_FRAME &&
opcode != Translation::INTERPRETED_FRAME) {
it.Skip(Translation::NumberOfOperandsFor(opcode));
continue;
}
it.Next(); // Skip ast_id
int shared_info_id = it.Next();
it.Next(); // Skip height
SharedFunctionInfo* shared_info = SharedFunctionInfo::cast(
deopt_input_data->LiteralArray()->get(shared_info_id));
if (!depth++) continue; // Skip the current function itself.
CodeEntry* inline_entry = new CodeEntry(
entry->tag(), profiles_->GetFunctionName(shared_info->DebugName()),
CodeEntry::kEmptyNamePrefix, entry->resource_name(),
CpuProfileNode::kNoLineNumberInfo,
CpuProfileNode::kNoColumnNumberInfo, NULL, code->instruction_start());
inline_entry->FillFunctionInfo(shared_info);
inline_stack.push_back(inline_entry);
}
if (!inline_stack.empty()) {
entry->AddInlineStack(pc_offset, inline_stack);
DCHECK(inline_stack.empty());
}
}
}
CpuProfiler::CpuProfiler(Isolate* isolate)
: isolate_(isolate),

View File

@ -258,6 +258,7 @@ class CpuProfiler : public CodeEventListener {
void StopProcessor();
void ResetProfiles();
void LogBuiltins();
void RecordInliningInfo(CodeEntry* entry, AbstractCode* abstract_code);
Isolate* isolate_;
base::TimeDelta sampling_interval_;

View File

@ -50,6 +50,11 @@ const char* const CodeEntry::kNoDeoptReason = "";
CodeEntry::~CodeEntry() {
delete line_info_;
for (auto location : inline_locations_) {
for (auto entry : location.second) {
delete entry;
}
}
}
@ -100,6 +105,18 @@ int CodeEntry::GetSourceLine(int pc_offset) const {
return v8::CpuProfileNode::kNoLineNumberInfo;
}
void CodeEntry::AddInlineStack(int pc_offset,
std::vector<CodeEntry*>& inline_stack) {
// It's better to use std::move to place the vector into the map,
// but it's not supported by the current stdlibc++ on MacOS.
inline_locations_.insert(std::make_pair(pc_offset, std::vector<CodeEntry*>()))
.first->second.swap(inline_stack);
}
const std::vector<CodeEntry*>* CodeEntry::GetInlineStack(int pc_offset) const {
auto it = inline_locations_.find(pc_offset);
return it != inline_locations_.end() ? &it->second : NULL;
}
void CodeEntry::FillFunctionInfo(SharedFunctionInfo* shared) {
if (!shared->script()->IsScript()) return;
@ -109,7 +126,6 @@ void CodeEntry::FillFunctionInfo(SharedFunctionInfo* shared) {
set_bailout_reason(GetBailoutReason(shared->disable_optimization_reason()));
}
CpuProfileDeoptInfo CodeEntry::GetDeoptInfo() {
DCHECK(has_deopt_info());
@ -274,17 +290,14 @@ unsigned ProfileTree::GetFunctionId(const ProfileNode* node) {
return static_cast<unsigned>(reinterpret_cast<uintptr_t>(entry->value));
}
ProfileNode* ProfileTree::AddPathFromEnd(const Vector<CodeEntry*>& path,
ProfileNode* ProfileTree::AddPathFromEnd(const std::vector<CodeEntry*>& path,
int src_line, bool update_stats) {
ProfileNode* node = root_;
CodeEntry* last_entry = NULL;
for (CodeEntry** entry = path.start() + path.length() - 1;
entry != path.start() - 1;
--entry) {
if (*entry != NULL) {
node = node->FindOrAddChild(*entry);
last_entry = *entry;
}
for (auto it = path.rbegin(); it != path.rend(); ++it) {
if (*it == NULL) continue;
last_entry = *it;
node = node->FindOrAddChild(*it);
}
if (last_entry && last_entry->has_deopt_info()) {
node->CollectDeoptInfo(last_entry);
@ -356,7 +369,7 @@ CpuProfile::CpuProfile(Isolate* isolate, const char* title, bool record_samples)
top_down_(isolate) {}
void CpuProfile::AddPath(base::TimeTicks timestamp,
const Vector<CodeEntry*>& path, int src_line,
const std::vector<CodeEntry*>& path, int src_line,
bool update_stats) {
ProfileNode* top_frame_node =
top_down_.AddPathFromEnd(path, src_line, update_stats);
@ -525,8 +538,8 @@ void CpuProfilesCollection::RemoveProfile(CpuProfile* profile) {
}
void CpuProfilesCollection::AddPathToCurrentProfiles(
base::TimeTicks timestamp, const Vector<CodeEntry*>& path, int src_line,
bool update_stats) {
base::TimeTicks timestamp, const std::vector<CodeEntry*>& path,
int src_line, bool update_stats) {
// As starting / stopping profiles is rare relatively to this
// method, we don't bother minimizing the duration of lock holding,
// e.g. copying contents of the list to a local vector.
@ -576,12 +589,10 @@ ProfileGenerator::ProfileGenerator(CpuProfilesCollection* profiles)
void ProfileGenerator::RecordTickSample(const TickSample& sample) {
// Allocate space for stack frames + pc + function + vm-state.
ScopedVector<CodeEntry*> entries(sample.frames_count + 3);
// As actual number of decoded code entries may vary, initialize
// entries vector with NULL values.
CodeEntry** entry = entries.start();
memset(entry, 0, entries.length() * sizeof(*entry));
std::vector<CodeEntry*> entries;
// Conservatively reserve space for stack frames + pc + function + vm-state.
// There could in fact be more of them because of inlined entries.
entries.reserve(sample.frames_count + 3);
// The ProfileNode knows nothing about all versions of generated code for
// the same JS function. The line number information associated with
@ -597,7 +608,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) {
// Don't use PC when in external callback code, as it can point
// inside callback's code, and we will erroneously report
// that a callback calls itself.
*entry++ = code_map_.FindEntry(sample.external_callback);
entries.push_back(code_map_.FindEntry(sample.external_callback));
} else {
CodeEntry* pc_entry = code_map_.FindEntry(sample.pc);
// If there is no pc_entry we're likely in native code.
@ -620,7 +631,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) {
src_line = pc_entry->line_number();
}
src_line_not_found = false;
*entry++ = pc_entry;
entries.push_back(pc_entry);
if (pc_entry->builtin_id() == Builtins::kFunctionPrototypeApply ||
pc_entry->builtin_id() == Builtins::kFunctionPrototypeCall) {
@ -631,7 +642,7 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) {
// former case we don't so we simply replace the frame with
// 'unresolved' entry.
if (sample.top_frame_type == StackFrame::JAVA_SCRIPT) {
*entry++ = unresolved_entry_;
entries.push_back(unresolved_entry_);
}
}
}
@ -640,35 +651,43 @@ void ProfileGenerator::RecordTickSample(const TickSample& sample) {
for (const Address *stack_pos = sample.stack,
*stack_end = stack_pos + sample.frames_count;
stack_pos != stack_end; ++stack_pos) {
*entry = code_map_.FindEntry(*stack_pos);
CodeEntry* entry = code_map_.FindEntry(*stack_pos);
// Skip unresolved frames (e.g. internal frame) and get source line of
// the first JS caller.
if (src_line_not_found && *entry) {
if (entry) {
// Find out if the entry has an inlining stack associated.
int pc_offset =
static_cast<int>(*stack_pos - (*entry)->instruction_start());
src_line = (*entry)->GetSourceLine(pc_offset);
if (src_line == v8::CpuProfileNode::kNoLineNumberInfo) {
src_line = (*entry)->line_number();
static_cast<int>(*stack_pos - entry->instruction_start());
const std::vector<CodeEntry*>* inline_stack =
entry->GetInlineStack(pc_offset);
if (inline_stack) {
entries.insert(entries.end(), inline_stack->rbegin(),
inline_stack->rend());
}
// Skip unresolved frames (e.g. internal frame) and get source line of
// the first JS caller.
if (src_line_not_found) {
src_line = entry->GetSourceLine(pc_offset);
if (src_line == v8::CpuProfileNode::kNoLineNumberInfo) {
src_line = entry->line_number();
}
src_line_not_found = false;
}
src_line_not_found = false;
}
entry++;
entries.push_back(entry);
}
}
if (FLAG_prof_browser_mode) {
bool no_symbolized_entries = true;
for (CodeEntry** e = entries.start(); e != entry; ++e) {
if (*e != NULL) {
for (auto e : entries) {
if (e != NULL) {
no_symbolized_entries = false;
break;
}
}
// If no frames were symbolized, put the VM state entry in.
if (no_symbolized_entries) {
*entry++ = EntryForVMState(sample.state);
entries.push_back(EntryForVMState(sample.state));
}
}

View File

@ -99,7 +99,11 @@ class CodeEntry {
int GetSourceLine(int pc_offset) const;
void AddInlineStack(int pc_offset, std::vector<CodeEntry*>& inline_stack);
const std::vector<CodeEntry*>* GetInlineStack(int pc_offset) const;
Address instruction_start() const { return instruction_start_; }
Logger::LogEventsAndTags tag() const { return TagField::decode(bit_field_); }
static const char* const kEmptyNamePrefix;
static const char* const kEmptyResourceName;
@ -109,7 +113,6 @@ class CodeEntry {
private:
class TagField : public BitField<Logger::LogEventsAndTags, 0, 8> {};
class BuiltinIdField : public BitField<Builtins::Name, 8, 8> {};
Logger::LogEventsAndTags tag() const { return TagField::decode(bit_field_); }
uint32_t bit_field_;
const char* name_prefix_;
@ -125,6 +128,8 @@ class CodeEntry {
size_t pc_offset_;
JITLineInfoTable* line_info_;
Address instruction_start_;
// Should be an unordered_map, but it doesn't currently work on Win & MacOS.
std::map<int, std::vector<CodeEntry*>> inline_locations_;
std::vector<InlinedFunctionInfo> inlined_function_infos_;
@ -191,7 +196,7 @@ class ProfileTree {
~ProfileTree();
ProfileNode* AddPathFromEnd(
const Vector<CodeEntry*>& path,
const std::vector<CodeEntry*>& path,
int src_line = v8::CpuProfileNode::kNoLineNumberInfo,
bool update_stats = true);
ProfileNode* root() const { return root_; }
@ -225,7 +230,7 @@ class CpuProfile {
CpuProfile(Isolate* isolate, const char* title, bool record_samples);
// Add pc -> ... -> main() call path to the profile.
void AddPath(base::TimeTicks timestamp, const Vector<CodeEntry*>& path,
void AddPath(base::TimeTicks timestamp, const std::vector<CodeEntry*>& path,
int src_line, bool update_stats);
void CalculateTotalTicksAndSamplingRate();
@ -334,8 +339,8 @@ class CpuProfilesCollection {
// Called from profile generator thread.
void AddPathToCurrentProfiles(base::TimeTicks timestamp,
const Vector<CodeEntry*>& path, int src_line,
bool update_stats);
const std::vector<CodeEntry*>& path,
int src_line, bool update_stats);
// Limits the number of profiles that can be simultaneously collected.
static const int kMaxSimultaneousProfiles = 100;

View File

@ -1504,6 +1504,68 @@ TEST(JsNativeJsRuntimeJsSampleMultiple) {
profile->Delete();
}
static const char* inlining_test_source =
"%NeverOptimizeFunction(action);\n"
"%NeverOptimizeFunction(start);\n"
"%OptimizeFunctionOnNextCall(level1);\n"
"%OptimizeFunctionOnNextCall(level2);\n"
"%OptimizeFunctionOnNextCall(level3);\n"
"var finish = false;\n"
"function action(n) {\n"
" var s = 0;\n"
" for (var i = 0; i < n; ++i) s += i*i*i;\n"
" if (finish)\n"
" startProfiling('my_profile');\n"
" return s;\n"
"}\n"
"function level3() { return action(100); }\n"
"function level2() { return level3() * 2; }\n"
"function level1() { return level2(); }\n"
"function start() {\n"
" var n = 100;\n"
" while (--n)\n"
" level1();\n"
" finish = true;\n"
" level1();\n"
"}";
// The test check multiple entrances/exits between JS and native code.
//
// [Top down]:
// (root) #0 1
// start #16 3
// level1 #0 4
// level2 #16 5
// level3 #16 6
// action #16 7
// (program) #0 2
TEST(Inlining) {
i::FLAG_allow_natives_syntax = true;
v8::HandleScope scope(CcTest::isolate());
v8::Local<v8::Context> env = CcTest::NewContext(PROFILER_EXTENSION);
v8::Context::Scope context_scope(env);
CompileRun(inlining_test_source);
v8::Local<v8::Function> function = GetFunction(env, "start");
v8::CpuProfiler* cpu_profiler = env->GetIsolate()->GetCpuProfiler();
v8::Local<v8::String> profile_name = v8_str("my_profile");
function->Call(env, env->Global(), 0, NULL).ToLocalChecked();
v8::CpuProfile* profile = cpu_profiler->StopProfiling(profile_name);
CHECK(profile);
// Dump collected profile to have a better diagnostic in case of failure.
reinterpret_cast<i::CpuProfile*>(profile)->Print();
const v8::CpuProfileNode* root = profile->GetTopDownRoot();
const v8::CpuProfileNode* start_node = GetChild(env, root, "start");
const v8::CpuProfileNode* level1_node = GetChild(env, start_node, "level1");
const v8::CpuProfileNode* level2_node = GetChild(env, level1_node, "level2");
const v8::CpuProfileNode* level3_node = GetChild(env, level2_node, "level3");
GetChild(env, level3_node, "action");
profile->Delete();
}
// [Top down]:
// 0 (root) #0 1
// 2 (program) #0 2

View File

@ -132,7 +132,7 @@ TEST(ProfileTreeAddPathFromEnd) {
CHECK(!helper.Walk(&entry3));
CodeEntry* path[] = {NULL, &entry3, NULL, &entry2, NULL, NULL, &entry1, NULL};
Vector<CodeEntry*> path_vec(path, sizeof(path) / sizeof(path[0]));
std::vector<CodeEntry*> path_vec(path, path + arraysize(path));
tree.AddPathFromEnd(path_vec);
CHECK(!helper.Walk(&entry2));
CHECK(!helper.Walk(&entry3));
@ -162,7 +162,7 @@ TEST(ProfileTreeAddPathFromEnd) {
CHECK_EQ(2u, node3->self_ticks());
CodeEntry* path2[] = {&entry2, &entry2, &entry1};
Vector<CodeEntry*> path2_vec(path2, sizeof(path2) / sizeof(path2[0]));
std::vector<CodeEntry*> path2_vec(path2, path2 + arraysize(path2));
tree.AddPathFromEnd(path2_vec);
CHECK(!helper.Walk(&entry2));
CHECK(!helper.Walk(&entry3));
@ -189,8 +189,7 @@ TEST(ProfileTreeCalculateTotalTicks) {
CodeEntry entry1(i::Logger::FUNCTION_TAG, "aaa");
CodeEntry* e1_path[] = {&entry1};
Vector<CodeEntry*> e1_path_vec(
e1_path, sizeof(e1_path) / sizeof(e1_path[0]));
std::vector<CodeEntry*> e1_path_vec(e1_path, e1_path + arraysize(e1_path));
ProfileTree single_child_tree(CcTest::i_isolate());
single_child_tree.AddPathFromEnd(e1_path_vec);
@ -204,8 +203,8 @@ TEST(ProfileTreeCalculateTotalTicks) {
CodeEntry entry2(i::Logger::FUNCTION_TAG, "bbb");
CodeEntry* e2_e1_path[] = {&entry2, &entry1};
Vector<CodeEntry*> e2_e1_path_vec(e2_e1_path,
sizeof(e2_e1_path) / sizeof(e2_e1_path[0]));
std::vector<CodeEntry*> e2_e1_path_vec(e2_e1_path,
e2_e1_path + arraysize(e2_e1_path));
ProfileTree flat_tree(CcTest::i_isolate());
ProfileTreeTestHelper flat_helper(&flat_tree);
@ -227,12 +226,10 @@ TEST(ProfileTreeCalculateTotalTicks) {
CHECK_EQ(2u, node1->self_ticks());
CodeEntry* e2_path[] = {&entry2};
Vector<CodeEntry*> e2_path_vec(
e2_path, sizeof(e2_path) / sizeof(e2_path[0]));
std::vector<CodeEntry*> e2_path_vec(e2_path, e2_path + arraysize(e2_path));
CodeEntry entry3(i::Logger::FUNCTION_TAG, "ccc");
CodeEntry* e3_path[] = {&entry3};
Vector<CodeEntry*> e3_path_vec(
e3_path, sizeof(e3_path) / sizeof(e3_path[0]));
std::vector<CodeEntry*> e3_path_vec(e3_path, e3_path + arraysize(e3_path));
ProfileTree wide_tree(CcTest::i_isolate());
ProfileTreeTestHelper wide_helper(&wide_tree);