[wasm][liftoff] Spill multi-used registers before branches
This extends the idea already used by "MaterializeMergedConstants": certain values have to be processed by every br*, so to protect against cascades of conditional jumps causing lots of repeated work, it makes sense to do such processing just once. For the module in the linked bug, this reduces Liftoff generated code size from 69MB to 181KB. Fixed: v8:13072 Change-Id: Ie9f98240e93751988067d4774d4a09b2b39bdad6 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3760444 Reviewed-by: Clemens Backes <clemensb@chromium.org> Commit-Queue: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/main@{#81722}
This commit is contained in:
parent
e66547b774
commit
0ef8ce7dca
@ -717,18 +717,36 @@ void LiftoffAssembler::PrepareLoopArgs(int num) {
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) {
|
||||
// Materialize constants on top of the stack ({arity} many), and locals.
|
||||
void LiftoffAssembler::PrepareForBranch(uint32_t arity, LiftoffRegList pinned) {
|
||||
VarState* stack_base = cache_state_.stack_state.data();
|
||||
for (auto slots :
|
||||
{base::VectorOf(stack_base + cache_state_.stack_state.size() - arity,
|
||||
arity),
|
||||
base::VectorOf(stack_base, num_locals())}) {
|
||||
for (VarState& slot : slots) {
|
||||
if (slot.is_reg()) {
|
||||
// Registers used more than once can't be used for merges.
|
||||
if (cache_state_.get_use_count(slot.reg()) > 1) {
|
||||
RegClass rc = reg_class_for(slot.kind());
|
||||
if (cache_state_.has_unused_register(rc, pinned)) {
|
||||
LiftoffRegister dst_reg = cache_state_.unused_register(rc, pinned);
|
||||
Move(dst_reg, slot.reg(), slot.kind());
|
||||
cache_state_.inc_used(dst_reg);
|
||||
cache_state_.dec_used(slot.reg());
|
||||
slot.MakeRegister(dst_reg);
|
||||
} else {
|
||||
Spill(slot.offset(), slot.reg(), slot.kind());
|
||||
cache_state_.dec_used(slot.reg());
|
||||
slot.MakeStack();
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Materialize constants.
|
||||
if (!slot.is_const()) continue;
|
||||
RegClass rc = reg_class_for(slot.kind());
|
||||
if (cache_state_.has_unused_register(rc)) {
|
||||
LiftoffRegister reg = cache_state_.unused_register(rc);
|
||||
if (cache_state_.has_unused_register(rc, pinned)) {
|
||||
LiftoffRegister reg = cache_state_.unused_register(rc, pinned);
|
||||
LoadConstant(reg, slot.constant());
|
||||
cache_state_.inc_used(reg);
|
||||
slot.MakeRegister(reg);
|
||||
|
@ -636,7 +636,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
return SpillOneRegister(candidates);
|
||||
}
|
||||
|
||||
void MaterializeMergedConstants(uint32_t arity);
|
||||
// Performs operations on locals and the top {arity} value stack entries
|
||||
// that would (very likely) have to be done by branches. Doing this up front
|
||||
// avoids making each subsequent (conditional) branch repeat this work.
|
||||
void PrepareForBranch(uint32_t arity, LiftoffRegList pinned);
|
||||
|
||||
enum JumpDirection { kForwardJump, kBackwardJump };
|
||||
void MergeFullStackWith(CacheState& target, const CacheState& source);
|
||||
|
@ -2703,12 +2703,9 @@ class LiftoffCompiler {
|
||||
}
|
||||
|
||||
void BrIf(FullDecoder* decoder, const Value& /* cond */, uint32_t depth) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// TODO(clemensb): Do the same for br_table.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label cont_false;
|
||||
@ -2791,12 +2788,21 @@ class LiftoffCompiler {
|
||||
}
|
||||
}
|
||||
if (need_temps) {
|
||||
LiftoffRegList pinned;
|
||||
tmp1 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
|
||||
tmp2 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// All targets must have the same arity (checked by validation), so
|
||||
// we can just sample any of them to find that arity.
|
||||
uint32_t ignored_length;
|
||||
uint32_t sample_depth = decoder->read_u32v<Decoder::kNoValidation>(
|
||||
imm.table, &ignored_length, "first depth");
|
||||
__ PrepareForBranch(decoder->control_at(sample_depth)->br_merge()->arity,
|
||||
pinned);
|
||||
}
|
||||
|
||||
BranchTableIterator<validate> table_iterator(decoder, imm);
|
||||
std::map<uint32_t, MovableLabel> br_targets;
|
||||
|
||||
@ -3487,11 +3493,9 @@ class LiftoffCompiler {
|
||||
void BrOnNull(FullDecoder* decoder, const Value& ref_object, uint32_t depth,
|
||||
bool pass_null_along_branch,
|
||||
Value* /* result_on_fallthrough */) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label cont_false;
|
||||
@ -3520,11 +3524,9 @@ class LiftoffCompiler {
|
||||
void BrOnNonNull(FullDecoder* decoder, const Value& ref_object,
|
||||
Value* /* result */, uint32_t depth,
|
||||
bool drop_null_on_fallthrough) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label cont_false;
|
||||
@ -5899,11 +5901,9 @@ class LiftoffCompiler {
|
||||
|
||||
void BrOnCast(FullDecoder* decoder, const Value& obj, const Value& rtt,
|
||||
Value* /* result_on_branch */, uint32_t depth) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label cont_false;
|
||||
@ -5927,11 +5927,9 @@ class LiftoffCompiler {
|
||||
|
||||
void BrOnCastFail(FullDecoder* decoder, const Value& obj, const Value& rtt,
|
||||
Value* /* result_on_fallthrough */, uint32_t depth) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label cont_branch, fallthrough;
|
||||
@ -6105,11 +6103,9 @@ class LiftoffCompiler {
|
||||
template <TypeChecker type_checker>
|
||||
void BrOnAbstractType(const Value& object, FullDecoder* decoder,
|
||||
uint32_t br_depth) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (br_depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(br_depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label no_match;
|
||||
@ -6126,11 +6122,9 @@ class LiftoffCompiler {
|
||||
template <TypeChecker type_checker>
|
||||
void BrOnNonAbstractType(const Value& object, FullDecoder* decoder,
|
||||
uint32_t br_depth) {
|
||||
// Before branching, materialize all constants. This avoids repeatedly
|
||||
// materializing them for each conditional branch.
|
||||
// Avoid having sequences of branches do duplicate work.
|
||||
if (br_depth != decoder->control_depth() - 1) {
|
||||
__ MaterializeMergedConstants(
|
||||
decoder->control_at(br_depth)->br_merge()->arity);
|
||||
__ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
|
||||
}
|
||||
|
||||
Label no_match, end;
|
||||
|
Loading…
Reference in New Issue
Block a user