PPC/s390: [regalloc] Resolve tail-call gap moves

Port 2f4397d652

Original Commit Message:

    Tail calls generate parallel moves that don't preserve the stack
    layout, which is not always correctly handled by the current
    gap resolver algorithm. Introduce a more robust algorithm that
    handles any parallel move.

    Design doc:
    https://docs.google.com/document/d/1WbX5B4ej9MzsJ28JS4J5cF2eiJhKyX39vAUB7GSA5YE/edit?usp=sharing

    Also remove the move-splitting logic. The problem that it addresses
    (swapping aliasing FP registers of different widths on arm) is already
    handled by the new algorithm.

R=thibaudm@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Change-Id: I6122e9fa7543171eff91435de1804f3a788455f5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4035332
Reviewed-by: Thibaud Michaud <thibaudm@chromium.org>
Reviewed-by: Junliang Yan <junyan@redhat.com>
Reviewed-by: Joran Siu <joransiu@ca.ibm.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#84394}
This commit is contained in:
Milad Fa 2022-11-18 14:11:41 -05:00 committed by V8 LUCI CQ
parent 2be56a5b4c
commit 3ecadd8184
2 changed files with 106 additions and 64 deletions

View File

@ -3566,6 +3566,58 @@ void CodeGenerator::PrepareForDeoptimizationExits(
__ CheckTrampolinePoolQuick(total_size);
}
AllocatedOperand CodeGenerator::Push(InstructionOperand* source) {
auto rep = LocationOperand::cast(source)->representation();
int new_slots = ElementSizeInPointers(rep);
PPCOperandConverter g(this, nullptr);
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int slot_id = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
if (source->IsFloatStackSlot() || source->IsDoubleStackSlot()) {
__ LoadU64(r0, g.ToMemOperand(source), r0);
__ Push(r0);
frame_access_state()->IncreaseSPDelta(new_slots);
} else {
// Bump the stack pointer and assemble the move.
__ addi(sp, sp, Operand(-(new_slots * kSystemPointerSize)));
frame_access_state()->IncreaseSPDelta(new_slots);
AssembleMove(source, &stack_slot);
}
temp_slots_ += new_slots;
return stack_slot;
}
void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
PPCOperandConverter g(this, nullptr);
if (dest->IsFloatStackSlot() || dest->IsDoubleStackSlot()) {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ Pop(scratch);
__ StoreU64(scratch, g.ToMemOperand(dest), r0);
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int slot_id = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
AssembleMove(&stack_slot, dest);
__ addi(sp, sp, Operand(new_slots * kSystemPointerSize));
}
temp_slots_ -= new_slots;
}
void CodeGenerator::PopTempStackSlots() {
if (temp_slots_ > 0) {
frame_access_state()->IncreaseSPDelta(-temp_slots_);
__ addi(sp, sp, Operand(temp_slots_ * kSystemPointerSize));
temp_slots_ = 0;
}
}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
auto rep = LocationOperand::cast(source)->representation();
@ -3581,22 +3633,7 @@ void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
} else {
DCHECK(!source->IsRegister() && !source->IsStackSlot());
// The scratch register is blocked by pending moves. Use the stack instead.
int new_slots = ElementSizeInPointers(rep);
PPCOperandConverter g(this, nullptr);
if (source->IsFloatStackSlot() || source->IsDoubleStackSlot()) {
__ LoadU64(r0, g.ToMemOperand(source), r0);
__ Push(r0);
} else {
// Bump the stack pointer and assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ addi(sp, sp, Operand(-(new_slots * kSystemPointerSize)));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
Push(source);
}
}
@ -3612,23 +3649,7 @@ void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
AssembleMove(&scratch, dest);
} else {
DCHECK(!dest->IsRegister() && !dest->IsStackSlot());
PPCOperandConverter g(this, nullptr);
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
if (dest->IsFloatStackSlot() || dest->IsDoubleStackSlot()) {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ Pop(scratch);
__ StoreU64(scratch, g.ToMemOperand(dest), r0);
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ addi(sp, sp, Operand(new_slots * kSystemPointerSize));
}
Pop(dest, rep);
}
move_cycle_ = MoveCycleState();
}

View File

@ -3596,6 +3596,56 @@ void CodeGenerator::FinishCode() {}
void CodeGenerator::PrepareForDeoptimizationExits(
ZoneDeque<DeoptimizationExit*>* exits) {}
AllocatedOperand CodeGenerator::Push(InstructionOperand* source) {
auto rep = LocationOperand::cast(source)->representation();
int new_slots = ElementSizeInPointers(rep);
S390OperandConverter g(this, nullptr);
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int slot_id = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
if (source->IsFloatStackSlot() || source->IsDoubleStackSlot()) {
__ LoadU64(r1, g.ToMemOperand(source));
__ Push(r1);
frame_access_state()->IncreaseSPDelta(new_slots);
} else {
// Bump the stack pointer and assemble the move.
__ lay(sp, MemOperand(sp, -(new_slots * kSystemPointerSize)));
frame_access_state()->IncreaseSPDelta(new_slots);
AssembleMove(source, &stack_slot);
}
temp_slots_ += new_slots;
return stack_slot;
}
void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
S390OperandConverter g(this, nullptr);
if (dest->IsFloatStackSlot() || dest->IsDoubleStackSlot()) {
__ Pop(r1);
__ StoreU64(r1, g.ToMemOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int slot_id = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
AssembleMove(&stack_slot, dest);
__ lay(sp, MemOperand(sp, new_slots * kSystemPointerSize));
}
temp_slots_ -= new_slots;
}
void CodeGenerator::PopTempStackSlots() {
if (temp_slots_ > 0) {
frame_access_state()->IncreaseSPDelta(-temp_slots_);
__ lay(sp, MemOperand(sp, temp_slots_ * kSystemPointerSize));
temp_slots_ = 0;
}
}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
auto rep = LocationOperand::cast(source)->representation();
@ -3611,22 +3661,7 @@ void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
} else {
DCHECK(!source->IsRegister() && !source->IsStackSlot());
// The scratch register is blocked by pending moves. Use the stack instead.
int new_slots = ElementSizeInPointers(rep);
S390OperandConverter g(this, nullptr);
if (source->IsFloatStackSlot() || source->IsDoubleStackSlot()) {
__ LoadU64(r1, g.ToMemOperand(source));
__ Push(r1);
} else {
// Bump the stack pointer and assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ lay(sp, MemOperand(sp, -(new_slots * kSystemPointerSize)));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
Push(source);
}
}
@ -3642,21 +3677,7 @@ void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
AssembleMove(&scratch, dest);
} else {
DCHECK(!dest->IsRegister() && !dest->IsStackSlot());
S390OperandConverter g(this, nullptr);
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
if (dest->IsFloatStackSlot() || dest->IsDoubleStackSlot()) {
__ Pop(r1);
__ StoreU64(r1, g.ToMemOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ lay(sp, MemOperand(sp, new_slots * kSystemPointerSize));
}
Pop(dest, rep);
}
move_cycle_ = MoveCycleState();
}