v8/test/cctest/compiler/test-basic-block-profiler.cc

142 lines
3.7 KiB
C++
Raw Normal View History

// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/diagnostics/basic-block-profiler.h"
#include "src/objects/objects-inl.h"
#include "test/cctest/cctest.h"
#include "test/cctest/compiler/codegen-tester.h"
namespace v8 {
namespace internal {
namespace compiler {
class BasicBlockProfilerTest : public RawMachineAssemblerTester<int32_t> {
public:
BasicBlockProfilerTest()
: RawMachineAssemblerTester<int32_t>(MachineType::Int32()) {
FLAG_turbo_profiling = true;
}
[diagnostics] Support --turbo-profiling for builtins Currently, if d8 is run with the --turbo-profiling flag, it prints info about every TurboFan-compiled function. This info includes the number of times that each basic block in the function was run. It also includes text representations of the function's schedule and code, so that the person reading the output can associate counters with blocks of code. The data about each function is currently stored in a BasicBlockProfiler::Data instance, which is attached to a list owned by the singleton BasicBlockProfiler. Each Data contains an std::vector<uint32_t> which represents how many times each block in the function has executed. The generated code for each block uses a raw pointer into the storage of that vector to implement incrementing the counter. With this change, if you compile with v8_enable_builtins_profiling and then run with --turbo-profiling, d8 will print that same info about builtins too. In order to generate code that can survive being serialized to a snapshot and reloaded, this change uses counters in the JS heap instead of a std::vector outside the JS heap. The steps for instrumentation are as follows: 1. Between scheduling and instruction selection, add code to increment the counter for each block. The counters array doesn't yet exist at this point, and allocation is disallowed, so at this point the code refers to a special marker value. 2. During finalization of the code, allocate a BasicBlockProfilingData object on the JS heap containing data equivalent to what is stored in BasicBlockProfiler::Data. This includes a ByteArray that is big enough to store the counters for each block. 3. Patch the reference in the BuiltinsConstantsTableBuilder so that instead of referring to the marker object, it now refers to this ByteArray. Also add the BasicBlockProfilingData object to a list that is attached to the heap roots so it can be easily accessed for printing. Because these steps include modifying the BuiltinsConstantsTableBuilder, this procedure is only applicable to builtins. Runtime-generated code still uses raw pointers into std::vector instances. In order to keep divergence between these code paths to a minimum, most work is done referring to instances of BasicBlockProfiler::Data (the C++ class), and functions are provided to copy back and forth between that type and BasicBlockProfilingData (the JS heap object). This change is intended only to make --turbo-profiling work consistently on more kinds of functions, but with some further work, this data could form the basis for: - code coverage info for fuzzers, and/or - hot-path info for profile-guided optimization. Bug: v8:10470, v8:9119 Change-Id: Ib556a5bc3abe67cdaa2e3ee62702a2a08b11cb61 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159738 Commit-Queue: Seth Brenith <seth.brenith@microsoft.com> Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#67944}
2020-05-21 15:51:40 +00:00
void ResetCounts() {
BasicBlockProfiler::Get()->ResetCounts(CcTest::i_isolate());
}
void Expect(size_t size, uint32_t* expected) {
const BasicBlockProfiler::DataList* l =
BasicBlockProfiler::Get()->data_list();
CHECK_NE(0, static_cast<int>(l->size()));
[diagnostics] Support --turbo-profiling for builtins Currently, if d8 is run with the --turbo-profiling flag, it prints info about every TurboFan-compiled function. This info includes the number of times that each basic block in the function was run. It also includes text representations of the function's schedule and code, so that the person reading the output can associate counters with blocks of code. The data about each function is currently stored in a BasicBlockProfiler::Data instance, which is attached to a list owned by the singleton BasicBlockProfiler. Each Data contains an std::vector<uint32_t> which represents how many times each block in the function has executed. The generated code for each block uses a raw pointer into the storage of that vector to implement incrementing the counter. With this change, if you compile with v8_enable_builtins_profiling and then run with --turbo-profiling, d8 will print that same info about builtins too. In order to generate code that can survive being serialized to a snapshot and reloaded, this change uses counters in the JS heap instead of a std::vector outside the JS heap. The steps for instrumentation are as follows: 1. Between scheduling and instruction selection, add code to increment the counter for each block. The counters array doesn't yet exist at this point, and allocation is disallowed, so at this point the code refers to a special marker value. 2. During finalization of the code, allocate a BasicBlockProfilingData object on the JS heap containing data equivalent to what is stored in BasicBlockProfiler::Data. This includes a ByteArray that is big enough to store the counters for each block. 3. Patch the reference in the BuiltinsConstantsTableBuilder so that instead of referring to the marker object, it now refers to this ByteArray. Also add the BasicBlockProfilingData object to a list that is attached to the heap roots so it can be easily accessed for printing. Because these steps include modifying the BuiltinsConstantsTableBuilder, this procedure is only applicable to builtins. Runtime-generated code still uses raw pointers into std::vector instances. In order to keep divergence between these code paths to a minimum, most work is done referring to instances of BasicBlockProfiler::Data (the C++ class), and functions are provided to copy back and forth between that type and BasicBlockProfilingData (the JS heap object). This change is intended only to make --turbo-profiling work consistently on more kinds of functions, but with some further work, this data could form the basis for: - code coverage info for fuzzers, and/or - hot-path info for profile-guided optimization. Bug: v8:10470, v8:9119 Change-Id: Ib556a5bc3abe67cdaa2e3ee62702a2a08b11cb61 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159738 Commit-Queue: Seth Brenith <seth.brenith@microsoft.com> Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#67944}
2020-05-21 15:51:40 +00:00
const BasicBlockProfilerData* data = l->back().get();
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
const uint32_t* counts = data->counts();
for (size_t i = 0; i < size; ++i) {
CHECK_EQ(expected[i], counts[i]);
}
}
void SetCounts(size_t size, uint32_t* new_counts) {
const BasicBlockProfiler::DataList* l =
BasicBlockProfiler::Get()->data_list();
CHECK_NE(0, static_cast<int>(l->size()));
BasicBlockProfilerData* data = l->back().get();
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
uint32_t* counts = const_cast<uint32_t*>(data->counts());
for (size_t i = 0; i < size; ++i) {
counts[i] = new_counts[i];
}
}
};
TEST(ProfileDiamond) {
BasicBlockProfilerTest m;
RawMachineLabel blocka, blockb, end;
m.Branch(m.Parameter(0), &blocka, &blockb);
m.Bind(&blocka);
m.Goto(&end);
m.Bind(&blockb);
m.Goto(&end);
m.Bind(&end);
m.Return(m.Int32Constant(0));
m.GenerateCode();
{
uint32_t expected[] = {0, 0, 0, 0, 0, 0};
m.Expect(arraysize(expected), expected);
}
m.Call(0);
{
uint32_t expected[] = {1, 1, 1, 0, 0, 1};
m.Expect(arraysize(expected), expected);
}
m.ResetCounts();
m.Call(1);
{
uint32_t expected[] = {1, 0, 0, 1, 1, 1};
m.Expect(arraysize(expected), expected);
}
m.Call(0);
{
uint32_t expected[] = {2, 1, 1, 1, 1, 2};
m.Expect(arraysize(expected), expected);
}
// Set the counters very high, to verify that they saturate rather than
// overflowing.
uint32_t near_overflow[] = {UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1,
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1};
m.SetCounts(arraysize(near_overflow), near_overflow);
m.Expect(arraysize(near_overflow), near_overflow);
m.Call(0);
m.Call(0);
{
uint32_t expected[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX,
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX};
m.Expect(arraysize(expected), expected);
}
}
TEST(ProfileLoop) {
BasicBlockProfilerTest m;
RawMachineLabel header, body, end;
Node* one = m.Int32Constant(1);
m.Goto(&header);
m.Bind(&header);
Node* count = m.Phi(MachineRepresentation::kWord32, m.Parameter(0), one);
m.Branch(count, &body, &end);
m.Bind(&body);
count->ReplaceInput(1, m.Int32Sub(count, one));
m.Goto(&header);
m.Bind(&end);
m.Return(one);
m.GenerateCode();
{
uint32_t expected[] = {0, 0, 0, 0, 0, 0};
m.Expect(arraysize(expected), expected);
}
uint32_t runs[] = {0, 1, 500, 10000};
for (size_t i = 0; i < arraysize(runs); i++) {
m.ResetCounts();
CHECK_EQ(1, m.Call(static_cast<int>(runs[i])));
uint32_t expected[] = {1, runs[i] + 1, runs[i], runs[i], 1, 1};
m.Expect(arraysize(expected), expected);
}
}
} // namespace compiler
} // namespace internal
} // namespace v8