[liftoff] Combine "InitMerge" with the actual merge

"InitMerge" did compute the state at the merge point, and a following
"MergeStackWith" or "MergeFullStackWith" would then generate the code to
merge the current state into the computed state.
As every "InitMerge" is followed by an actual merge, we can combine the
two and save one iteration over the two states.

The only change in generated code is that we initialize the merge state
after a one-armed if from the if-state instead of the else-state. This
could potentially make the if-branch slightly cheaper and the
else-branch slightly slower, but will not negatively impact overall code
size.

This CL should save roughly 2% of Liftoff compilation time.

R=dlehmann@chromium.org

Bug: v8:13565, v8:13673
Change-Id: Id323a15e7fd765727f46830509fbaf7f5498c229
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4203380
Reviewed-by: Daniel Lehmann <dlehmann@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85616}
This commit is contained in:
Clemens Backes 2023-02-02 12:14:19 +01:00 committed by V8 LUCI CQ
parent 69dd71dabe
commit 656c74218f
3 changed files with 155 additions and 101 deletions

View File

@ -102,6 +102,10 @@ class StackTransferRecipe {
DCHECK_EQ(dst.i32_const(), src.i32_const());
return;
}
TransferToStackSlot(dst, src);
}
void TransferToStackSlot(const VarState& dst, const VarState& src) {
DCHECK(dst.is_stack());
switch (src.loc()) {
case VarState::kStack:
@ -396,25 +400,43 @@ enum ReuseRegisters : bool {
kReuseRegisters = true,
kNoReuseRegisters = false
};
// {InitMergeRegion} is a helper used by {MergeIntoNewState} to initialize
// a part of the target stack ([target, target+count]) from [source,
// source+count]. The parameters specify how to initialize the part. The goal is
// to set up the region such that later merges (via {MergeStackWith} /
// {MergeFullStackWith} can successfully transfer their values to this new
// state.
void InitMergeRegion(LiftoffAssembler::CacheState* state,
const VarState* source, VarState* target, uint32_t count,
MergeKeepStackSlots keep_stack_slots,
MergeAllowConstants allow_constants,
MergeAllowRegisters allow_registers,
ReuseRegisters reuse_registers, LiftoffRegList used_regs) {
ReuseRegisters reuse_registers, LiftoffRegList used_regs,
int new_stack_offset, StackTransferRecipe& transfers) {
RegisterReuseMap register_reuse_map;
for (const VarState* source_end = source + count; source < source_end;
++source, ++target) {
if ((source->is_stack() && keep_stack_slots) ||
(source->is_const() && allow_constants)) {
*target = *source;
// If {new_stack_offset} is set, we want to recompute stack offsets for
// the region we are initializing such that they are contiguous. If
// {new_stack_offset} is zero (which is an illegal stack offset), we just
// keep the source offsets.
if (new_stack_offset) {
new_stack_offset =
LiftoffAssembler::NextSpillOffset(source->kind(), new_stack_offset);
target->set_offset(new_stack_offset);
}
continue;
}
base::Optional<LiftoffRegister> reg;
bool needs_reg_transfer = true;
if (allow_registers) {
// First try: Keep the same register, if it's free.
if (source->is_reg() && state->is_free(source->reg())) {
reg = source->reg();
needs_reg_transfer = false;
}
// Second try: Use the same register we used before (if we reuse
// registers).
@ -427,51 +449,76 @@ void InitMergeRegion(LiftoffAssembler::CacheState* state,
reg = state->unused_register(rc, used_regs);
}
}
if (!reg) {
// No free register; make this a stack slot.
*target = VarState(source->kind(), source->offset());
continue;
// See above: Recompute the stack offset if requested.
int target_offset = source->offset();
if (new_stack_offset) {
new_stack_offset =
LiftoffAssembler::NextSpillOffset(source->kind(), new_stack_offset);
target_offset = new_stack_offset;
}
if (reg) {
if (needs_reg_transfer) transfers.LoadIntoRegister(*reg, *source);
if (reuse_registers) register_reuse_map.Add(source->reg(), *reg);
state->inc_used(*reg);
*target = VarState(source->kind(), *reg, target_offset);
} else {
// No free register; make this a stack slot.
*target = VarState(source->kind(), target_offset);
transfers.TransferToStackSlot(*target, *source);
}
if (reuse_registers) register_reuse_map.Add(source->reg(), *reg);
state->inc_used(*reg);
*target = VarState(source->kind(), *reg, source->offset());
}
}
} // namespace
// TODO(clemensb): Don't copy the full parent state (this makes us N^2).
void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
uint32_t num_locals,
uint32_t arity,
uint32_t stack_depth) {
// |------locals------|---(in between)----|--(discarded)--|----merge----|
// <-- num_locals --> <-- stack_depth -->^stack_base <-- arity -->
LiftoffAssembler::CacheState LiftoffAssembler::MergeIntoNewState(
uint32_t num_locals, uint32_t arity, uint32_t stack_depth) {
CacheState target;
if (source.cached_instance != no_reg) {
SetInstanceCacheRegister(source.cached_instance);
// The source state looks like this:
// |------locals------|---(stack prefix)---|--(discarded)--|----merge----|
// <-- num_locals --> <-- stack_depth --> <-- arity -->
//
// We compute the following target state from it:
// |------locals------|---(stack prefix)----|----merge----|
// <-- num_locals --> <-- stack_depth --> <-- arity -->
//
// The target state will have dropped the "(discarded)" region, and the
// "locals" and "merge" regions have been modified to avoid any constants and
// avoid duplicate register uses. This ensures that later merges can
// successfully transfer into the target state.
// The "stack prefix" region will be identical for any source that merges into
// that state.
if (cache_state_.cached_instance != no_reg) {
target.SetInstanceCacheRegister(cache_state_.cached_instance);
}
if (source.cached_mem_start != no_reg) {
SetMemStartCacheRegister(source.cached_mem_start);
if (cache_state_.cached_mem_start != no_reg) {
target.SetMemStartCacheRegister(cache_state_.cached_mem_start);
}
uint32_t stack_base = stack_depth + num_locals;
uint32_t target_height = stack_base + arity;
uint32_t discarded = source.stack_height() - target_height;
DCHECK(stack_state.empty());
uint32_t target_height = num_locals + stack_depth + arity;
DCHECK_GE(source.stack_height(), stack_base);
stack_state.resize_no_init(target_height);
target.stack_state.resize_no_init(target_height);
const VarState* source_begin = source.stack_state.data();
VarState* target_begin = stack_state.data();
const VarState* source_begin = cache_state_.stack_state.data();
VarState* target_begin = target.stack_state.data();
// Try to keep locals and the merge region in their registers. Register used
// Compute the starts of the three different regions, for source and target
// (see pictograms above).
const VarState* locals_source = source_begin;
const VarState* stack_prefix_source = source_begin + num_locals;
const VarState* merge_source = cache_state_.stack_state.end() - arity;
VarState* locals_target = target_begin;
VarState* stack_prefix_target = target_begin + num_locals;
VarState* merge_target = target_begin + num_locals + stack_depth;
// Try to keep locals and the merge region in their registers. Registers used
// multiple times need to be copied to another free register. Compute the list
// of used registers.
LiftoffRegList used_regs;
for (auto& src : base::VectorOf(source_begin, num_locals)) {
for (auto& src : base::VectorOf(locals_source, num_locals)) {
if (src.is_reg()) used_regs.set(src.reg());
}
// If there is more than one operand in the merge region, a stack-to-stack
@ -481,44 +528,54 @@ void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
MergeAllowRegisters allow_registers =
arity <= 1 ? kRegistersAllowed : kRegistersNotAllowed;
if (allow_registers) {
for (auto& src :
base::VectorOf(source_begin + stack_base + discarded, arity)) {
for (auto& src : base::VectorOf(merge_source, arity)) {
if (src.is_reg()) used_regs.set(src.reg());
}
}
// Initialize the merge region. If this region moves, try to turn stack slots
// into registers since we need to load the value anyways.
MergeKeepStackSlots keep_merge_stack_slots =
discarded == 0 ? kKeepStackSlots : kTurnStackSlotsIntoRegisters;
InitMergeRegion(this, source_begin + stack_base + discarded,
target_begin + stack_base, arity, keep_merge_stack_slots,
kConstantsNotAllowed, allow_registers, kNoReuseRegisters,
used_regs);
// Shift spill offsets down to keep slots contiguous.
int offset = stack_base == 0 ? StaticStackFrameSize()
: source.stack_state[stack_base - 1].offset();
auto merge_region = base::VectorOf(target_begin + stack_base, arity);
for (VarState& var : merge_region) {
offset = LiftoffAssembler::NextSpillOffset(var.kind(), offset);
var.set_offset(offset);
StackTransferRecipe transfers(this);
// The merge region is often empty, hence check for this before doing any
// work (even though not needed for correctness).
if (arity) {
// Initialize the merge region. If this region moves, try to turn stack
// slots into registers since we need to load the value anyways.
MergeKeepStackSlots keep_merge_stack_slots =
target_height == cache_state_.stack_height()
? kKeepStackSlots
: kTurnStackSlotsIntoRegisters;
// Shift spill offsets down to keep slots contiguous.
int merge_region_stack_offset = merge_target == target_begin
? StaticStackFrameSize()
: merge_source[-1].offset();
InitMergeRegion(&target, merge_source, merge_target, arity,
keep_merge_stack_slots, kConstantsNotAllowed,
allow_registers, kNoReuseRegisters, used_regs,
merge_region_stack_offset, transfers);
}
// Initialize the locals region. Here, stack slots stay stack slots (because
// they do not move). Try to keep register in registers, but avoid duplicates.
InitMergeRegion(this, source_begin, target_begin, num_locals, kKeepStackSlots,
kConstantsNotAllowed, kRegistersAllowed, kNoReuseRegisters,
used_regs);
if (num_locals) {
InitMergeRegion(&target, locals_source, locals_target, num_locals,
kKeepStackSlots, kConstantsNotAllowed, kRegistersAllowed,
kNoReuseRegisters, used_regs, 0, transfers);
}
// Consistency check: All the {used_regs} are really in use now.
DCHECK_EQ(used_regs, used_registers & used_regs);
DCHECK_EQ(used_regs, target.used_registers & used_regs);
// Last, initialize the section in between. Here, constants are allowed, but
// registers which are already used for the merge region or locals must be
// Last, initialize the "stack prefix" region. Here, constants are allowed,
// but registers which are already used for the merge region or locals must be
// moved to other registers or spilled. If a register appears twice in the
// source region, ensure to use the same register twice in the target region.
InitMergeRegion(this, source_begin + num_locals, target_begin + num_locals,
stack_depth, kKeepStackSlots, kConstantsAllowed,
kRegistersAllowed, kReuseRegisters, used_regs);
if (stack_depth) {
InitMergeRegion(&target, stack_prefix_source, stack_prefix_target,
stack_depth, kKeepStackSlots, kConstantsAllowed,
kRegistersAllowed, kReuseRegisters, used_regs, 0,
transfers);
}
return target;
}
void LiftoffAssembler::CacheState::Steal(CacheState& source) {
@ -785,10 +842,8 @@ void LiftoffAssembler::MergeFullStackWith(CacheState& target) {
for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) {
transfers.TransferStackSlot(target.stack_state[i],
cache_state_.stack_state[i]);
DCHECK(!SlotInterference(
target.stack_state[i],
base::VectorOf(cache_state_.stack_state.data() + i + 1,
cache_state_.stack_height() - i - 1)));
DCHECK(!SlotInterference(target.stack_state[i],
base::VectorOf(cache_state_.stack_state) + i + 1));
}
// Full stack merging is only done for forward jumps, so we can just clear the

View File

@ -453,10 +453,6 @@ class LiftoffAssembler : public MacroAssembler {
return reg;
}
// TODO(clemensb): Don't copy the full parent state (this makes us N^2).
void InitMerge(const CacheState& source, uint32_t num_locals,
uint32_t arity, uint32_t stack_depth);
void Steal(CacheState& source);
void Split(const CacheState& source);
@ -639,8 +635,15 @@ class LiftoffAssembler : public MacroAssembler {
// avoids making each subsequent (conditional) branch repeat this work.
void PrepareForBranch(uint32_t arity, LiftoffRegList pinned);
enum JumpDirection { kForwardJump, kBackwardJump };
// These methods handle control-flow merges. {MergeIntoNewState} is used to
// generate a new {CacheState} for a merge point, and also emits code to
// transfer values from the current state to the new merge state.
// {MergeFullStackWith} and {MergeStackWith} then later generate the code for
// more merges into an existing state.
V8_NODISCARD CacheState MergeIntoNewState(uint32_t num_locals, uint32_t arity,
uint32_t stack_depth);
void MergeFullStackWith(CacheState& target);
enum JumpDirection { kForwardJump, kBackwardJump };
void MergeStackWith(CacheState& target, uint32_t arity, JumpDirection);
void Spill(VarState* slot);

View File

@ -1370,14 +1370,14 @@ class LiftoffCompiler {
MaybeOSR();
} else {
DCHECK(target->is_incomplete_try());
if (!target->try_info->catch_reached) {
target->try_info->catch_state.InitMerge(
*__ cache_state(), __ num_locals(), 1,
target->stack_depth + target->num_exceptions);
if (target->try_info->catch_reached) {
__ MergeStackWith(target->try_info->catch_state, 1,
LiftoffAssembler::kForwardJump);
} else {
target->try_info->catch_state = __ MergeIntoNewState(
__ num_locals(), 1, target->stack_depth + target->num_exceptions);
target->try_info->catch_reached = true;
}
__ MergeStackWith(target->try_info->catch_state, 1,
LiftoffAssembler::kForwardJump);
__ emit_jump(&target->try_info->catch_label);
}
}
@ -1478,19 +1478,13 @@ class LiftoffCompiler {
}
void FallThruTo(FullDecoder* decoder, Control* c) {
if (!c->end_merge.reached) {
c->label_state.InitMerge(*__ cache_state(), __ num_locals(),
c->end_merge.arity,
c->stack_depth + c->num_exceptions);
}
DCHECK(!c->is_try_catchall());
if (c->is_try_catch()) {
// Drop the implicit exception ref if any. There may be none if this is a
// catch-less try block.
DCHECK_IMPLIES(c->is_try_catchall(), !c->end_merge.reached);
if (c->end_merge.reached) {
__ MergeStackWith(c->label_state, c->br_merge()->arity,
LiftoffAssembler::kForwardJump);
} else {
__ MergeFullStackWith(c->label_state);
c->label_state = __ MergeIntoNewState(__ num_locals(), c->end_merge.arity,
c->stack_depth + c->num_exceptions);
}
__ emit_jump(c->label.get());
TraceCacheState(decoder);
@ -1513,13 +1507,12 @@ class LiftoffCompiler {
__ cache_state()->Steal(c->label_state);
} else if (c->reachable()) {
// No merge yet at the end of the if, but we need to create a merge for
// the both arms of this if. Thus init the merge point from the else
// state, then merge the if state into that.
// the both arms of this if. Thus init the merge point from the current
// state, then merge the else state into that.
DCHECK_EQ(c->start_merge.arity, c->end_merge.arity);
c->label_state.InitMerge(c->else_state->state, __ num_locals(),
c->start_merge.arity,
c->label_state =
__ MergeIntoNewState(__ num_locals(), c->start_merge.arity,
c->stack_depth + c->num_exceptions);
__ MergeFullStackWith(c->label_state);
__ emit_jump(c->label.get());
// Merge the else state into the end state. Set this state as the current
// state first so helper functions know which registers are in use.
@ -2742,14 +2735,15 @@ class LiftoffCompiler {
// and found to not make a difference.
}
}
if (!target->br_merge()->reached) {
target->label_state.InitMerge(
*__ cache_state(), __ num_locals(), target->br_merge()->arity,
target->stack_depth + target->num_exceptions);
if (target->br_merge()->reached) {
__ MergeStackWith(target->label_state, target->br_merge()->arity,
target->is_loop() ? LiftoffAssembler::kBackwardJump
: LiftoffAssembler::kForwardJump);
} else {
target->label_state =
__ MergeIntoNewState(__ num_locals(), target->br_merge()->arity,
target->stack_depth + target->num_exceptions);
}
__ MergeStackWith(target->label_state, target->br_merge()->arity,
target->is_loop() ? LiftoffAssembler::kBackwardJump
: LiftoffAssembler::kForwardJump);
__ jmp(target->label.get());
}
@ -2912,12 +2906,13 @@ class LiftoffCompiler {
void Else(FullDecoder* decoder, Control* c) {
if (c->reachable()) {
if (!c->end_merge.reached) {
c->label_state.InitMerge(*__ cache_state(), __ num_locals(),
c->end_merge.arity,
if (c->end_merge.reached) {
__ MergeFullStackWith(c->label_state);
} else {
c->label_state =
__ MergeIntoNewState(__ num_locals(), c->end_merge.arity,
c->stack_depth + c->num_exceptions);
}
__ MergeFullStackWith(c->label_state);
__ emit_jump(c->label.get());
}
__ bind(c->else_state->label.get());
@ -4675,14 +4670,15 @@ class LiftoffCompiler {
Control* current_try =
decoder->control_at(decoder->control_depth_of_current_catch());
DCHECK_NOT_NULL(current_try->try_info);
if (!current_try->try_info->catch_reached) {
current_try->try_info->catch_state.InitMerge(
*__ cache_state(), __ num_locals(), 1,
if (current_try->try_info->catch_reached) {
__ MergeStackWith(current_try->try_info->catch_state, 1,
LiftoffAssembler::kForwardJump);
} else {
current_try->try_info->catch_state = __ MergeIntoNewState(
__ num_locals(), 1,
current_try->stack_depth + current_try->num_exceptions);
current_try->try_info->catch_reached = true;
}
__ MergeStackWith(current_try->try_info->catch_state, 1,
LiftoffAssembler::kForwardJump);
__ emit_jump(&current_try->try_info->catch_label);
__ bind(&skip_handler);