2014-09-29 07:29:14 +00:00
|
|
|
// Copyright 2014 the V8 project authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
|
|
|
|
2019-05-20 09:15:06 +00:00
|
|
|
#include "src/diagnostics/basic-block-profiler.h"
|
2019-05-23 08:51:46 +00:00
|
|
|
#include "src/objects/objects-inl.h"
|
2014-09-29 07:29:14 +00:00
|
|
|
#include "test/cctest/cctest.h"
|
|
|
|
#include "test/cctest/compiler/codegen-tester.h"
|
|
|
|
|
2015-10-30 09:16:26 +00:00
|
|
|
namespace v8 {
|
|
|
|
namespace internal {
|
|
|
|
namespace compiler {
|
2014-09-29 07:29:14 +00:00
|
|
|
|
|
|
|
class BasicBlockProfilerTest : public RawMachineAssemblerTester<int32_t> {
|
|
|
|
public:
|
2015-12-10 09:03:30 +00:00
|
|
|
BasicBlockProfilerTest()
|
|
|
|
: RawMachineAssemblerTester<int32_t>(MachineType::Int32()) {
|
2014-09-29 07:29:14 +00:00
|
|
|
FLAG_turbo_profiling = true;
|
|
|
|
}
|
|
|
|
|
[diagnostics] Support --turbo-profiling for builtins
Currently, if d8 is run with the --turbo-profiling flag, it prints info
about every TurboFan-compiled function. This info includes the number of
times that each basic block in the function was run. It also includes
text representations of the function's schedule and code, so that the
person reading the output can associate counters with blocks of code.
The data about each function is currently stored in a
BasicBlockProfiler::Data instance, which is attached to a list owned by
the singleton BasicBlockProfiler. Each Data contains an
std::vector<uint32_t> which represents how many times each block in the
function has executed. The generated code for each block uses a raw
pointer into the storage of that vector to implement incrementing the
counter.
With this change, if you compile with v8_enable_builtins_profiling and
then run with --turbo-profiling, d8 will print that same info about
builtins too.
In order to generate code that can survive being serialized to a
snapshot and reloaded, this change uses counters in the JS heap instead
of a std::vector outside the JS heap. The steps for instrumentation are
as follows:
1. Between scheduling and instruction selection, add code to increment
the counter for each block. The counters array doesn't yet exist at
this point, and allocation is disallowed, so at this point the code
refers to a special marker value.
2. During finalization of the code, allocate a BasicBlockProfilingData
object on the JS heap containing data equivalent to what is stored in
BasicBlockProfiler::Data. This includes a ByteArray that is big
enough to store the counters for each block.
3. Patch the reference in the BuiltinsConstantsTableBuilder so that
instead of referring to the marker object, it now refers to this
ByteArray. Also add the BasicBlockProfilingData object to a list that
is attached to the heap roots so it can be easily accessed for
printing.
Because these steps include modifying the BuiltinsConstantsTableBuilder,
this procedure is only applicable to builtins. Runtime-generated code
still uses raw pointers into std::vector instances. In order to keep
divergence between these code paths to a minimum, most work is done
referring to instances of BasicBlockProfiler::Data (the C++ class), and
functions are provided to copy back and forth between that type and
BasicBlockProfilingData (the JS heap object).
This change is intended only to make --turbo-profiling work consistently
on more kinds of functions, but with some further work, this data could
form the basis for:
- code coverage info for fuzzers, and/or
- hot-path info for profile-guided optimization.
Bug: v8:10470, v8:9119
Change-Id: Ib556a5bc3abe67cdaa2e3ee62702a2a08b11cb61
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159738
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67944}
2020-05-21 15:51:40 +00:00
|
|
|
void ResetCounts() {
|
|
|
|
BasicBlockProfiler::Get()->ResetCounts(CcTest::i_isolate());
|
|
|
|
}
|
2014-09-29 07:29:14 +00:00
|
|
|
|
|
|
|
void Expect(size_t size, uint32_t* expected) {
|
|
|
|
const BasicBlockProfiler::DataList* l =
|
2018-07-26 13:10:28 +00:00
|
|
|
BasicBlockProfiler::Get()->data_list();
|
2014-09-29 07:29:14 +00:00
|
|
|
CHECK_NE(0, static_cast<int>(l->size()));
|
[diagnostics] Support --turbo-profiling for builtins
Currently, if d8 is run with the --turbo-profiling flag, it prints info
about every TurboFan-compiled function. This info includes the number of
times that each basic block in the function was run. It also includes
text representations of the function's schedule and code, so that the
person reading the output can associate counters with blocks of code.
The data about each function is currently stored in a
BasicBlockProfiler::Data instance, which is attached to a list owned by
the singleton BasicBlockProfiler. Each Data contains an
std::vector<uint32_t> which represents how many times each block in the
function has executed. The generated code for each block uses a raw
pointer into the storage of that vector to implement incrementing the
counter.
With this change, if you compile with v8_enable_builtins_profiling and
then run with --turbo-profiling, d8 will print that same info about
builtins too.
In order to generate code that can survive being serialized to a
snapshot and reloaded, this change uses counters in the JS heap instead
of a std::vector outside the JS heap. The steps for instrumentation are
as follows:
1. Between scheduling and instruction selection, add code to increment
the counter for each block. The counters array doesn't yet exist at
this point, and allocation is disallowed, so at this point the code
refers to a special marker value.
2. During finalization of the code, allocate a BasicBlockProfilingData
object on the JS heap containing data equivalent to what is stored in
BasicBlockProfiler::Data. This includes a ByteArray that is big
enough to store the counters for each block.
3. Patch the reference in the BuiltinsConstantsTableBuilder so that
instead of referring to the marker object, it now refers to this
ByteArray. Also add the BasicBlockProfilingData object to a list that
is attached to the heap roots so it can be easily accessed for
printing.
Because these steps include modifying the BuiltinsConstantsTableBuilder,
this procedure is only applicable to builtins. Runtime-generated code
still uses raw pointers into std::vector instances. In order to keep
divergence between these code paths to a minimum, most work is done
referring to instances of BasicBlockProfiler::Data (the C++ class), and
functions are provided to copy back and forth between that type and
BasicBlockProfilingData (the JS heap object).
This change is intended only to make --turbo-profiling work consistently
on more kinds of functions, but with some further work, this data could
form the basis for:
- code coverage info for fuzzers, and/or
- hot-path info for profile-guided optimization.
Bug: v8:10470, v8:9119
Change-Id: Ib556a5bc3abe67cdaa2e3ee62702a2a08b11cb61
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159738
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67944}
2020-05-21 15:51:40 +00:00
|
|
|
const BasicBlockProfilerData* data = l->back().get();
|
2014-09-29 07:29:14 +00:00
|
|
|
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
|
2021-02-09 23:24:38 +00:00
|
|
|
const uint32_t* counts = data->counts();
|
2014-09-29 07:29:14 +00:00
|
|
|
for (size_t i = 0; i < size; ++i) {
|
2021-02-09 23:24:38 +00:00
|
|
|
CHECK_EQ(expected[i], counts[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetCounts(size_t size, uint32_t* new_counts) {
|
|
|
|
const BasicBlockProfiler::DataList* l =
|
|
|
|
BasicBlockProfiler::Get()->data_list();
|
|
|
|
CHECK_NE(0, static_cast<int>(l->size()));
|
|
|
|
BasicBlockProfilerData* data = l->back().get();
|
|
|
|
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
|
|
|
|
uint32_t* counts = const_cast<uint32_t*>(data->counts());
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
|
|
|
counts[i] = new_counts[i];
|
2014-09-29 07:29:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
TEST(ProfileDiamond) {
|
|
|
|
BasicBlockProfilerTest m;
|
|
|
|
|
2015-11-30 11:28:50 +00:00
|
|
|
RawMachineLabel blocka, blockb, end;
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Branch(m.Parameter(0), &blocka, &blockb);
|
|
|
|
m.Bind(&blocka);
|
|
|
|
m.Goto(&end);
|
|
|
|
m.Bind(&blockb);
|
|
|
|
m.Goto(&end);
|
|
|
|
m.Bind(&end);
|
|
|
|
m.Return(m.Int32Constant(0));
|
|
|
|
|
|
|
|
m.GenerateCode();
|
|
|
|
{
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {0, 0, 0, 0, 0, 0};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
m.Call(0);
|
|
|
|
{
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {1, 1, 1, 0, 0, 1};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
m.ResetCounts();
|
|
|
|
|
|
|
|
m.Call(1);
|
|
|
|
{
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {1, 0, 0, 1, 1, 1};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
m.Call(0);
|
|
|
|
{
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {2, 1, 1, 1, 1, 2};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
2021-02-09 23:24:38 +00:00
|
|
|
|
|
|
|
// Set the counters very high, to verify that they saturate rather than
|
|
|
|
// overflowing.
|
|
|
|
uint32_t near_overflow[] = {UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1,
|
|
|
|
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1};
|
|
|
|
m.SetCounts(arraysize(near_overflow), near_overflow);
|
|
|
|
m.Expect(arraysize(near_overflow), near_overflow);
|
|
|
|
|
|
|
|
m.Call(0);
|
|
|
|
m.Call(0);
|
|
|
|
{
|
|
|
|
uint32_t expected[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX,
|
|
|
|
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX};
|
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
2014-09-29 07:29:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TEST(ProfileLoop) {
|
|
|
|
BasicBlockProfilerTest m;
|
|
|
|
|
2015-11-30 11:28:50 +00:00
|
|
|
RawMachineLabel header, body, end;
|
2014-09-29 07:29:14 +00:00
|
|
|
Node* one = m.Int32Constant(1);
|
|
|
|
m.Goto(&header);
|
|
|
|
|
|
|
|
m.Bind(&header);
|
2015-12-10 09:03:30 +00:00
|
|
|
Node* count = m.Phi(MachineRepresentation::kWord32, m.Parameter(0), one);
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Branch(count, &body, &end);
|
|
|
|
|
|
|
|
m.Bind(&body);
|
|
|
|
count->ReplaceInput(1, m.Int32Sub(count, one));
|
|
|
|
m.Goto(&header);
|
|
|
|
|
|
|
|
m.Bind(&end);
|
|
|
|
m.Return(one);
|
|
|
|
|
|
|
|
m.GenerateCode();
|
|
|
|
{
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {0, 0, 0, 0, 0, 0};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t runs[] = {0, 1, 500, 10000};
|
|
|
|
for (size_t i = 0; i < arraysize(runs); i++) {
|
|
|
|
m.ResetCounts();
|
|
|
|
CHECK_EQ(1, m.Call(static_cast<int>(runs[i])));
|
2018-11-13 16:28:21 +00:00
|
|
|
uint32_t expected[] = {1, runs[i] + 1, runs[i], runs[i], 1, 1};
|
2014-09-29 07:29:14 +00:00
|
|
|
m.Expect(arraysize(expected), expected);
|
|
|
|
}
|
|
|
|
}
|
2015-10-30 09:16:26 +00:00
|
|
|
|
|
|
|
} // namespace compiler
|
|
|
|
} // namespace internal
|
|
|
|
} // namespace v8
|