[arm][turbofan] Implement on-stack returns.

This is the implementation of crrev.com/c/766371 for arm.

Original description:

Add the ability to return (multiple) return values on the stack:

- Extend stack frames with a new buffer region for return slots.
  This region is located at the end of a caller's frame such that
  its slots can be indexed as caller frame slots in a callee
  (located beyond its parameters) and assigned return values.
- Adjust stack frame constructon and deconstruction accordingly.
- Extend linkage computation to support register plus stack returns.
- Reserve return slots in caller frame when respective calls occur.
- Introduce and generate architecture instructions ('peek') for
  reading back results from return slots in the caller.
- Aggressive tests.
- Some minor clean-up.

R=v8-arm-ports@googlegroups.com

Change-Id: I7d61424a184d5778baf1d1270013f4e0c7ec68b4
Reviewed-on: https://chromium-review.googlesource.com/836608
Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50237}
This commit is contained in:
Andreas Haas 2017-12-20 13:01:23 +01:00 committed by Commit Bot
parent d2ef40dd59
commit 9c7b6e1e8a
5 changed files with 52 additions and 3 deletions

View File

@ -1675,6 +1675,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmPeek: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
}
} else {
__ ldr(i.OutputRegister(), MemOperand(fp, offset));
}
break;
}
case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(),
@ -2902,8 +2919,9 @@ void CodeGenerator::AssembleConstructFrame() {
}
}
// Skip callee-saved slots, which are pushed below.
// Skip callee-saved and return slots, which are pushed below.
shrink_slots -= base::bits::CountPopulation(saves);
shrink_slots -= frame()->GetReturnSlotCount();
shrink_slots -= 2 * base::bits::CountPopulation(saves_fp);
if (shrink_slots > 0) {
__ sub(sp, sp, Operand(shrink_slots * kPointerSize));
@ -2919,16 +2937,29 @@ void CodeGenerator::AssembleConstructFrame() {
__ vstm(db_w, sp, DwVfpRegister::from_code(first),
DwVfpRegister::from_code(last));
}
if (saves != 0) {
// Save callee-saved registers.
__ stm(db_w, sp, saves);
}
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
// Create space for returns.
__ sub(sp, sp, Operand(returns * kPointerSize));
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
int pop_count = static_cast<int>(descriptor->StackParameterCount());
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
// Free space of returns.
__ add(sp, sp, Operand(returns * kPointerSize));
}
// Restore registers.
const RegList saves = descriptor->CalleeSavedRegisters();
if (saves != 0) {

View File

@ -124,6 +124,7 @@ namespace compiler {
V(ArmStr) \
V(ArmPush) \
V(ArmPoke) \
V(ArmPeek) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \

View File

@ -262,6 +262,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmLdrh:
case kArmLdrsh:
case kArmLdr:
case kArmPeek:
return kIsLoadOperation;
case kArmVstrF32:

View File

@ -1573,7 +1573,23 @@ void InstructionSelector::EmitPrepareArguments(
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
ArmOperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
reverse_slot += output.location.GetSizeInPointers();
// Skip any alignment holes in nodes.
if (output.node == nullptr) continue;
DCHECK(!descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
}
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kArmPeek | MiscField::encode(reverse_slot), result);
}
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }

View File

@ -170,7 +170,7 @@
##############################################################################
# TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC
['arch == arm or arch == arm64 or arch == mips or arch == mips64 or arch == mipsel or arch == mips64el or arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
['arch == arm64 or arch == mips or arch == mips64 or arch == mipsel or arch == mips64el or arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
'test-multiple-return/*': [SKIP],
}],