Don't use floating-point values in basic block instrumentation

Previously in https://chromium-review.googlesource.com/c/v8/v8/+/2545573
I updated BasicBlockInstrumentor to use 64-bit floating-point values
rather than 32-bit integers, so that it could never overflow. However,
I've now learned that some builtins (particularly RecordWrite) are not
allowed to use floating-point registers, and so running with
basic block instrumentation enabled could produce incorrect results.
This change switches back to 32-bit integers, but adds saturation logic.

Bug: chromium:1170776
Change-Id: Icbd93919fb05f50d615ec479263142addbe15c9e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2685617
Reviewed-by: Michael Stanton <mvstanton@chromium.org>
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#72626}
This commit is contained in:
Seth Brenith 2021-02-09 15:24:38 -08:00 committed by Commit Bot
parent ef89782a5b
commit a66f2b00cd
7 changed files with 70 additions and 30 deletions

View File

@ -92,7 +92,8 @@ BasicBlockProfilerData* BasicBlockInstrumentor::Instrument(
} else {
counters_array = graph->NewNode(PointerConstant(&common, data->counts()));
}
Node* one = graph->NewNode(common.Float64Constant(1));
Node* zero = graph->NewNode(common.Int32Constant(0));
Node* one = graph->NewNode(common.Int32Constant(1));
BasicBlockVector* blocks = schedule->rpo_order();
size_t block_number = 0;
for (BasicBlockVector::iterator it = blocks->begin(); block_number < n_blocks;
@ -104,26 +105,37 @@ BasicBlockProfilerData* BasicBlockInstrumentor::Instrument(
// It is unnecessary to wire effect and control deps for load and store
// since this happens after scheduling.
// Construct increment operation.
int offset_to_counter_value = static_cast<int>(block_number) * kDoubleSize;
int offset_to_counter_value = static_cast<int>(block_number) * kInt32Size;
if (on_heap_counters) {
offset_to_counter_value += ByteArray::kHeaderSize - kHeapObjectTag;
}
Node* offset_to_counter =
graph->NewNode(IntPtrConstant(&common, offset_to_counter_value));
Node* load =
graph->NewNode(machine.Load(MachineType::Float64()), counters_array,
graph->NewNode(machine.Load(MachineType::Uint32()), counters_array,
offset_to_counter, graph->start(), graph->start());
Node* inc = graph->NewNode(machine.Float64Add(), load, one);
Node* store = graph->NewNode(
machine.Store(StoreRepresentation(MachineRepresentation::kFloat64,
kNoWriteBarrier)),
counters_array, offset_to_counter, inc, graph->start(), graph->start());
Node* inc = graph->NewNode(machine.Int32Add(), load, one);
// Branchless saturation, because we've already run the scheduler, so
// introducing extra control flow here would be surprising.
Node* overflow = graph->NewNode(machine.Uint32LessThan(), inc, load);
Node* overflow_mask = graph->NewNode(machine.Int32Sub(), zero, overflow);
Node* saturated_inc =
graph->NewNode(machine.Word32Or(), inc, overflow_mask);
Node* store =
graph->NewNode(machine.Store(StoreRepresentation(
MachineRepresentation::kWord32, kNoWriteBarrier)),
counters_array, offset_to_counter, saturated_inc,
graph->start(), graph->start());
// Insert the new nodes.
static const int kArraySize = 6;
Node* to_insert[kArraySize] = {counters_array, one, offset_to_counter,
load, inc, store};
// The first two Nodes are constant across all blocks.
int insertion_start = block_number == 0 ? 0 : 2;
static const int kArraySize = 10;
Node* to_insert[kArraySize] = {
counters_array, zero, one, offset_to_counter,
load, inc, overflow, overflow_mask,
saturated_inc, store};
// The first three Nodes are constant across all blocks.
int insertion_start = block_number == 0 ? 0 : 3;
NodeVector::iterator insertion_point = FindInsertionPoint(block);
block->InsertNodes(insertion_point, &to_insert[insertion_start],
&to_insert[kArraySize]);

View File

@ -60,7 +60,7 @@ Handle<String> CopyStringToJSHeap(const std::string& source, Isolate* isolate) {
}
constexpr int kBlockIdSlotSize = kInt32Size;
constexpr int kBlockCountSlotSize = kDoubleSize;
constexpr int kBlockCountSlotSize = kInt32Size;
} // namespace
BasicBlockProfilerData::BasicBlockProfilerData(
@ -81,8 +81,7 @@ void BasicBlockProfilerData::CopyFromJSHeap(
code_ = js_heap_data.code().ToCString().get();
ByteArray counts(js_heap_data.counts());
for (int i = 0; i < counts.length() / kBlockCountSlotSize; ++i) {
counts_.push_back(
reinterpret_cast<double*>(counts.GetDataStartAddress())[i]);
counts_.push_back(counts.get_uint32(i));
}
ByteArray block_ids(js_heap_data.block_ids());
for (int i = 0; i < block_ids.length() / kBlockIdSlotSize; ++i) {
@ -112,7 +111,7 @@ Handle<OnHeapBasicBlockProfilerData> BasicBlockProfilerData::CopyToJSHeap(
Handle<ByteArray> counts = isolate->factory()->NewByteArray(
counts_array_size_in_bytes, AllocationType::kOld);
for (int i = 0; i < static_cast<int>(n_blocks()); ++i) {
reinterpret_cast<double*>(counts->GetDataStartAddress())[i] = counts_[i];
counts->set_uint32(i, counts_[i]);
}
Handle<String> name = CopyStringToJSHeap(function_name_, isolate);
Handle<String> schedule = CopyStringToJSHeap(schedule_, isolate);
@ -133,7 +132,7 @@ void BasicBlockProfiler::ResetCounts(Isolate* isolate) {
Handle<ByteArray> counts(
OnHeapBasicBlockProfilerData::cast(list->Get(i)).counts(), isolate);
for (int j = 0; j < counts->length() / kBlockCountSlotSize; ++j) {
reinterpret_cast<double*>(counts->GetDataStartAddress())[j] = 0;
counts->set_uint32(j, 0);
}
}
}
@ -197,9 +196,11 @@ void BasicBlockProfilerData::Log(Isolate* isolate) {
}
std::ostream& operator<<(std::ostream& os, const BasicBlockProfilerData& d) {
double block_count_sum =
std::accumulate(d.counts_.begin(), d.counts_.end(), 0);
if (block_count_sum == 0) return os;
if (std::all_of(d.counts_.cbegin(), d.counts_.cend(),
[](uint32_t count) { return count == 0; })) {
// No data was collected for this function.
return os;
}
const char* name = "unknown function";
if (!d.function_name_.empty()) {
name = d.function_name_.c_str();
@ -210,14 +211,14 @@ std::ostream& operator<<(std::ostream& os, const BasicBlockProfilerData& d) {
os << d.schedule_.c_str() << std::endl;
}
os << "block counts for " << name << ":" << std::endl;
std::vector<std::pair<size_t, double>> pairs;
std::vector<std::pair<size_t, uint32_t>> pairs;
pairs.reserve(d.n_blocks());
for (size_t i = 0; i < d.n_blocks(); ++i) {
pairs.push_back(std::make_pair(i, d.counts_[i]));
}
std::sort(
pairs.begin(), pairs.end(),
[=](std::pair<size_t, double> left, std::pair<size_t, double> right) {
[=](std::pair<size_t, uint32_t> left, std::pair<size_t, uint32_t> right) {
if (right.second == left.second) return left.first < right.first;
return right.second < left.second;
});

View File

@ -36,7 +36,7 @@ class BasicBlockProfilerData {
DCHECK_EQ(block_ids_.size(), counts_.size());
return block_ids_.size();
}
const double* counts() const { return &counts_[0]; }
const uint32_t* counts() const { return &counts_[0]; }
void SetCode(const std::ostringstream& os);
void SetFunctionName(std::unique_ptr<char[]> name);
@ -62,7 +62,7 @@ class BasicBlockProfilerData {
// These vectors are indexed by reverse post-order block number.
std::vector<int32_t> block_ids_;
std::vector<double> counts_;
std::vector<uint32_t> counts_;
std::string function_name_;
std::string schedule_;
std::string code_;

View File

@ -1088,7 +1088,7 @@ void Logger::TimerEvent(Logger::StartEnd se, const char* name) {
}
void Logger::BasicBlockCounterEvent(const char* name, int block_id,
double count) {
uint32_t count) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();
msg << ProfileDataFromFileConstants::kBlockCounterMarker << kNext << name

View File

@ -243,7 +243,7 @@ class Logger : public CodeEventListener {
V8_EXPORT_PRIVATE void TimerEvent(StartEnd se, const char* name);
void BasicBlockCounterEvent(const char* name, int block_id, double count);
void BasicBlockCounterEvent(const char* name, int block_id, uint32_t count);
void BuiltinHashEvent(const char* name, int hash);

View File

@ -97,7 +97,7 @@ class UncompiledDataWithPreparseData extends UncompiledData {
@export
class OnHeapBasicBlockProfilerData extends HeapObject {
block_ids: ByteArray; // Stored as 4-byte ints
counts: ByteArray; // Stored as 8-byte floats
counts: ByteArray; // Stored as 4-byte unsigned ints
name: String;
schedule: String;
code: String;

View File

@ -28,9 +28,21 @@ class BasicBlockProfilerTest : public RawMachineAssemblerTester<int32_t> {
CHECK_NE(0, static_cast<int>(l->size()));
const BasicBlockProfilerData* data = l->back().get();
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
const double* counts = data->counts();
const uint32_t* counts = data->counts();
for (size_t i = 0; i < size; ++i) {
CHECK_EQ(static_cast<double>(expected[i]), counts[i]);
CHECK_EQ(expected[i], counts[i]);
}
}
void SetCounts(size_t size, uint32_t* new_counts) {
const BasicBlockProfiler::DataList* l =
BasicBlockProfiler::Get()->data_list();
CHECK_NE(0, static_cast<int>(l->size()));
BasicBlockProfilerData* data = l->back().get();
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
uint32_t* counts = const_cast<uint32_t*>(data->counts());
for (size_t i = 0; i < size; ++i) {
counts[i] = new_counts[i];
}
}
};
@ -73,6 +85,21 @@ TEST(ProfileDiamond) {
uint32_t expected[] = {2, 1, 1, 1, 1, 2};
m.Expect(arraysize(expected), expected);
}
// Set the counters very high, to verify that they saturate rather than
// overflowing.
uint32_t near_overflow[] = {UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1,
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX - 1};
m.SetCounts(arraysize(near_overflow), near_overflow);
m.Expect(arraysize(near_overflow), near_overflow);
m.Call(0);
m.Call(0);
{
uint32_t expected[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX,
UINT32_MAX - 1, UINT32_MAX - 1, UINT32_MAX};
m.Expect(arraysize(expected), expected);
}
}