[compiler] Cache StateValue processing in InstructionSelector.

Processing StateValues into operands is one of the most costly
parts of instruction selection. As it happens, StateValues are
shared by many nodes, and so we are unecessarily reprocessing
the same StateValues multiple times. This CL introduces caching
for the processed StateValues enabling very fast emitting of
operands for subsiquent instructions with the same StateValue.
The hitrate for the cache is higher than 90% on most optimizations.

BUG=v8:9684

Change-Id: I45db86dcbf22ab972b892f11c608b825aeb3ecf3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2749634
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Ross McIlroy <rmcilroy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73342}
This commit is contained in:
Ross McIlroy 2021-03-11 10:00:42 +00:00 committed by Commit Bot
parent d38f84acc4
commit 812eb26456
3 changed files with 136 additions and 13 deletions

View File

@ -60,6 +60,7 @@ InstructionSelector::InstructionSelector(
enable_scheduling_(enable_scheduling),
enable_roots_relative_addressing_(enable_roots_relative_addressing),
enable_switch_jump_table_(enable_switch_jump_table),
state_values_cache_(zone),
poisoning_level_(poisoning_level),
frame_(frame),
instruction_selection_failed_(false),
@ -652,24 +653,88 @@ size_t InstructionSelector::AddOperandToStateValueDescriptor(
}
}
struct InstructionSelector::CachedStateValues : public ZoneObject {
public:
CachedStateValues(Zone* zone, StateValueList* values, size_t values_start,
InstructionOperandVector* inputs, size_t inputs_start)
: inputs_(inputs->begin() + inputs_start, inputs->end(), zone),
values_(values->MakeSlice(values_start)) {}
size_t Emit(InstructionOperandVector* inputs, StateValueList* values) {
inputs->insert(inputs->end(), inputs_.begin(), inputs_.end());
values->PushCachedSlice(values_);
return inputs_.size();
}
private:
InstructionOperandVector inputs_;
StateValueList::Slice values_;
};
class InstructionSelector::CachedStateValuesBuilder {
public:
explicit CachedStateValuesBuilder(StateValueList* values,
InstructionOperandVector* inputs)
: values_(values),
inputs_(inputs),
values_start_(values->size()),
nested_start_(values->nested_count()),
inputs_start_(inputs->size()) {}
// We can only build a CachedStateValues for a StateValue if it's processing
// does not create any nested StateValueLists.
bool CanCache() const { return values_->nested_count() == nested_start_; }
InstructionSelector::CachedStateValues* Build(Zone* zone) {
DCHECK(CanCache());
return zone->New<InstructionSelector::CachedStateValues>(
zone, values_, values_start_, inputs_, inputs_start_);
}
private:
StateValueList* values_;
InstructionOperandVector* inputs_;
size_t values_start_;
size_t nested_start_;
size_t inputs_start_;
};
size_t InstructionSelector::AddInputsToFrameStateDescriptor(
StateValueList* values, InstructionOperandVector* inputs,
OperandGenerator* g, StateObjectDeduplicator* deduplicator, Node* node,
FrameStateInputKind kind, Zone* zone) {
size_t entries = 0;
StateValuesAccess::iterator it = StateValuesAccess(node).begin();
// Take advantage of sparse nature of StateValuesAccess to skip over multiple
// empty nodes at once pushing repeated OptimizedOuts all in one go.
while (!it.done()) {
values->PushOptimizedOut(it.AdvanceTillNotEmpty());
if (it.done()) break;
StateValuesAccess::TypedNode input_node = *it;
entries += AddOperandToStateValueDescriptor(values, inputs, g, deduplicator,
input_node.node,
input_node.type, kind, zone);
++it;
// StateValues are often shared across different nodes, and processing them is
// expensive, so cache the result of processing a StateValue so that we can
// quickly copy the result if we see it again.
FrameStateInput key(node, kind);
auto cache_entry = state_values_cache_.find(key);
if (cache_entry != state_values_cache_.end()) {
// Entry found in cache, emit cached version.
return cache_entry->second->Emit(inputs, values);
} else {
// Not found in cache, generate and then store in cache if possible.
size_t entries = 0;
CachedStateValuesBuilder cache_builder(values, inputs);
StateValuesAccess::iterator it = StateValuesAccess(node).begin();
// Take advantage of sparse nature of StateValuesAccess to skip over
// multiple empty nodes at once pushing repeated OptimizedOuts all in one
// go.
while (!it.done()) {
values->PushOptimizedOut(it.AdvanceTillNotEmpty());
if (it.done()) break;
StateValuesAccess::TypedNode input_node = *it;
entries += AddOperandToStateValueDescriptor(values, inputs, g,
deduplicator, input_node.node,
input_node.type, kind, zone);
++it;
}
if (cache_builder.CanCache()) {
// Use this->zone() to build the cache entry in the instruction selector's
// zone rather than the more long-lived instruction zone.
state_values_cache_.emplace(key, cache_builder.Build(this->zone()));
}
return entries;
}
return entries;
}
// Returns the number of instruction operands added to inputs.

View File

@ -700,6 +700,31 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
};
#endif // V8_TARGET_ARCH_64_BIT
struct FrameStateInput {
FrameStateInput(Node* node_, FrameStateInputKind kind_)
: node(node_), kind(kind_) {}
Node* node;
FrameStateInputKind kind;
struct Hash {
size_t operator()(FrameStateInput const& source) const {
return base::hash_combine(source.node,
static_cast<size_t>(source.kind));
}
};
struct Equal {
bool operator()(FrameStateInput const& lhs,
FrameStateInput const& rhs) const {
return lhs.node == rhs.node && lhs.kind == rhs.kind;
}
};
};
struct CachedStateValues;
class CachedStateValuesBuilder;
// ===========================================================================
Zone* const zone_;
@ -723,6 +748,9 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
EnableScheduling enable_scheduling_;
EnableRootsRelativeAddressing enable_roots_relative_addressing_;
EnableSwitchJumpTable enable_switch_jump_table_;
ZoneUnorderedMap<FrameStateInput, CachedStateValues*, FrameStateInput::Hash,
FrameStateInput::Equal>
state_values_cache_;
PoisoningMitigationLevel poisoning_level_;
Frame* frame_;

View File

@ -1229,6 +1229,8 @@ class StateValueList {
size_t size() { return fields_.size(); }
size_t nested_count() { return nested_.size(); }
struct Value {
StateValueDescriptor* desc;
StateValueList* nested;
@ -1270,6 +1272,14 @@ class StateValueList {
ZoneVector<StateValueList*>::iterator nested_iterator;
};
struct Slice {
Slice(ZoneVector<StateValueDescriptor>::iterator start, size_t fields)
: start_position(start), fields_count(fields) {}
ZoneVector<StateValueDescriptor>::iterator start_position;
size_t fields_count;
};
void ReserveSize(size_t size) { fields_.reserve(size); }
StateValueList* PushRecursiveField(Zone* zone, size_t id) {
@ -1293,11 +1303,31 @@ class StateValueList {
void PushOptimizedOut(size_t num = 1) {
fields_.insert(fields_.end(), num, StateValueDescriptor::OptimizedOut());
}
void PushCachedSlice(const Slice& cached) {
fields_.insert(fields_.end(), cached.start_position,
cached.start_position + cached.fields_count);
}
// Returns a Slice representing the (non-nested) fields in StateValueList from
// values_start to the current end position.
Slice MakeSlice(size_t values_start) {
DCHECK(!HasNestedFieldsAfter(values_start));
size_t fields_count = fields_.size() - values_start;
return Slice(fields_.begin() + values_start, fields_count);
}
iterator begin() { return iterator(fields_.begin(), nested_.begin()); }
iterator end() { return iterator(fields_.end(), nested_.end()); }
private:
bool HasNestedFieldsAfter(size_t values_start) {
auto it = fields_.begin() + values_start;
for (; it != fields_.end(); it++) {
if (it->IsNested()) return true;
}
return false;
}
ZoneVector<StateValueDescriptor> fields_;
ZoneVector<StateValueList*> nested_;
};