[arm64][turbofan] Implement on-stack returns.
This is the implementation of crrev.com/c/766371 for arm64. Original description: Add the ability to return (multiple) return values on the stack: - Extend stack frames with a new buffer region for return slots. This region is located at the end of a caller's frame such that its slots can be indexed as caller frame slots in a callee (located beyond its parameters) and assigned return values. - Adjust stack frame constructon and deconstruction accordingly. - Extend linkage computation to support register plus stack returns. - Reserve return slots in caller frame when respective calls occur. - Introduce and generate architecture instructions ('peek') for reading back results from return slots in the caller. - Aggressive tests. - Some minor clean-up. R=v8-arm-ports@googlegroups.com Change-Id: I6e344a23f359861c9a1ff5a6511651c2176ce9a8 Reviewed-on: https://chromium-review.googlesource.com/842545 Reviewed-by: Ben Titzer <titzer@chromium.org> Commit-Queue: Andreas Haas <ahaas@chromium.org> Cr-Commit-Position: refs/heads/master@{#50585}
This commit is contained in:
parent
997d5a6a5c
commit
c710e6584c
@ -1224,6 +1224,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kArm64Peek: {
|
||||
int reverse_slot = i.InputInt32(0);
|
||||
int offset =
|
||||
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
|
||||
if (instr->OutputAt(0)->IsFPRegister()) {
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
__ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
|
||||
}
|
||||
} else {
|
||||
__ Ldr(i.OutputRegister(), MemOperand(fp, offset));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kArm64Clz:
|
||||
__ Clz(i.OutputRegister64(), i.InputRegister64(0));
|
||||
break;
|
||||
@ -2287,6 +2304,9 @@ void CodeGenerator::AssembleConstructFrame() {
|
||||
descriptor->CalleeSavedRegisters());
|
||||
CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
|
||||
descriptor->CalleeSavedFPRegisters());
|
||||
// The number of slots for returns has to be even to ensure the correct stack
|
||||
// alignment.
|
||||
const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
|
||||
|
||||
if (frame_access_state()->has_frame()) {
|
||||
// Link the frame
|
||||
@ -2358,6 +2378,7 @@ void CodeGenerator::AssembleConstructFrame() {
|
||||
// Skip callee-saved slots, which are pushed below.
|
||||
shrink_slots -= saves.Count();
|
||||
shrink_slots -= saves_fp.Count();
|
||||
shrink_slots -= returns;
|
||||
|
||||
// Build remainder of frame, including accounting for and filling-in
|
||||
// frame-specific header information, i.e. claiming the extra slot that
|
||||
@ -2400,11 +2421,21 @@ void CodeGenerator::AssembleConstructFrame() {
|
||||
// CPURegList::GetCalleeSaved(): x30 is missing.
|
||||
// DCHECK(saves.list() == CPURegList::GetCalleeSaved().list());
|
||||
__ PushCPURegList(saves);
|
||||
|
||||
if (returns != 0) {
|
||||
__ Claim(returns);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
|
||||
CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
|
||||
|
||||
const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
|
||||
|
||||
if (returns != 0) {
|
||||
__ Drop(returns);
|
||||
}
|
||||
|
||||
// Restore registers.
|
||||
CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
|
||||
descriptor->CalleeSavedRegisters());
|
||||
|
@ -82,6 +82,7 @@ namespace compiler {
|
||||
V(Arm64Claim) \
|
||||
V(Arm64Poke) \
|
||||
V(Arm64PokePair) \
|
||||
V(Arm64Peek) \
|
||||
V(Arm64Float32Cmp) \
|
||||
V(Arm64Float32Add) \
|
||||
V(Arm64Float32Sub) \
|
||||
|
@ -292,6 +292,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64Ldrsw:
|
||||
case kArm64LdrW:
|
||||
case kArm64Ldr:
|
||||
case kArm64Peek:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kArm64Float64Mod: // This opcode will call a C Function which can
|
||||
|
@ -1721,7 +1721,25 @@ void InstructionSelector::EmitPrepareArguments(
|
||||
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
|
||||
const CallDescriptor* descriptor,
|
||||
Node* node) {
|
||||
// TODO(ahaas): Port.
|
||||
Arm64OperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
// Skip any alignment holes in nodes.
|
||||
if (output.node == nullptr) continue;
|
||||
DCHECK(!descriptor->IsCFunctionCall());
|
||||
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
}
|
||||
|
||||
Emit(kArm64Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
}
|
||||
}
|
||||
|
||||
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
|
||||
|
@ -22,6 +22,13 @@ Frame::Frame(int fixed_frame_size_in_slots)
|
||||
|
||||
int Frame::AlignFrame(int alignment) {
|
||||
int alignment_slots = alignment / kPointerSize;
|
||||
// We have to align return slots separately, because they are claimed
|
||||
// separately on the stack.
|
||||
int return_delta =
|
||||
alignment_slots - (return_slot_count_ & (alignment_slots - 1));
|
||||
if (return_delta != alignment_slots) {
|
||||
frame_slot_count_ += return_delta;
|
||||
}
|
||||
int delta = alignment_slots - (frame_slot_count_ & (alignment_slots - 1));
|
||||
if (delta != alignment_slots) {
|
||||
frame_slot_count_ += delta;
|
||||
|
@ -169,7 +169,7 @@
|
||||
|
||||
##############################################################################
|
||||
# TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC
|
||||
['arch == arm64 or arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
|
||||
['arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
|
||||
'test-multiple-return/*': [SKIP],
|
||||
}],
|
||||
|
||||
|
@ -271,42 +271,47 @@ TEST_MULTI(Float64, MachineType::Float64())
|
||||
#undef TEST_MULTI
|
||||
|
||||
void ReturnLastValue(MachineType type) {
|
||||
v8::internal::AccountingAllocator allocator;
|
||||
Zone zone(&allocator, ZONE_NAME);
|
||||
// Let 2 returns be on the stack.
|
||||
const int return_count = num_registers(type) + 2;
|
||||
for (int unused_stack_slots = 0; unused_stack_slots <= 2;
|
||||
++unused_stack_slots) {
|
||||
v8::internal::AccountingAllocator allocator;
|
||||
Zone zone(&allocator, ZONE_NAME);
|
||||
// Let {unused_stack_slots + 1} returns be on the stack.
|
||||
const int return_count = num_registers(type) + unused_stack_slots + 1;
|
||||
|
||||
CallDescriptor* desc = CreateMonoCallDescriptor(&zone, return_count, 0, type);
|
||||
CallDescriptor* desc =
|
||||
CreateMonoCallDescriptor(&zone, return_count, 0, type);
|
||||
|
||||
HandleAndZoneScope handles;
|
||||
RawMachineAssembler m(handles.main_isolate(),
|
||||
new (handles.main_zone()) Graph(handles.main_zone()),
|
||||
desc, MachineType::PointerRepresentation(),
|
||||
InstructionSelector::SupportedMachineOperatorFlags());
|
||||
HandleAndZoneScope handles;
|
||||
RawMachineAssembler m(handles.main_isolate(),
|
||||
new (handles.main_zone()) Graph(handles.main_zone()),
|
||||
desc, MachineType::PointerRepresentation(),
|
||||
InstructionSelector::SupportedMachineOperatorFlags());
|
||||
|
||||
std::unique_ptr<Node* []> returns(new Node*[return_count]);
|
||||
std::unique_ptr<Node* []> returns(new Node*[return_count]);
|
||||
|
||||
for (int i = 0; i < return_count; ++i) {
|
||||
returns[i] = Constant(m, type, i);
|
||||
for (int i = 0; i < return_count; ++i) {
|
||||
returns[i] = Constant(m, type, i);
|
||||
}
|
||||
|
||||
m.Return(return_count, returns.get());
|
||||
|
||||
CompilationInfo info(ArrayVector("testing"), handles.main_zone(),
|
||||
Code::STUB);
|
||||
Handle<Code> code = Pipeline::GenerateCodeForTesting(
|
||||
&info, handles.main_isolate(), desc, m.graph(), m.Export());
|
||||
|
||||
// Generate caller.
|
||||
int expect = return_count - 1;
|
||||
RawMachineAssemblerTester<int32_t> mt;
|
||||
Node* code_node = mt.HeapConstant(code);
|
||||
|
||||
Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node);
|
||||
|
||||
mt.Return(ToInt32(
|
||||
mt, type, mt.AddNode(mt.common()->Projection(return_count - 1), call)));
|
||||
|
||||
CHECK_EQ(expect, mt.Call());
|
||||
}
|
||||
|
||||
m.Return(return_count, returns.get());
|
||||
|
||||
CompilationInfo info(ArrayVector("testing"), handles.main_zone(), Code::STUB);
|
||||
Handle<Code> code = Pipeline::GenerateCodeForTesting(
|
||||
&info, handles.main_isolate(), desc, m.graph(), m.Export());
|
||||
|
||||
// Generate caller.
|
||||
int expect = return_count - 1;
|
||||
RawMachineAssemblerTester<int32_t> mt;
|
||||
Node* code_node = mt.HeapConstant(code);
|
||||
|
||||
Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node);
|
||||
|
||||
mt.Return(ToInt32(
|
||||
mt, type, mt.AddNode(mt.common()->Projection(return_count - 1), call)));
|
||||
|
||||
CHECK_EQ(expect, mt.Call());
|
||||
}
|
||||
|
||||
TEST(ReturnLastValueInt32) { ReturnLastValue(MachineType::Int32()); }
|
||||
@ -316,6 +321,65 @@ TEST(ReturnLastValueInt64) { ReturnLastValue(MachineType::Int64()); }
|
||||
TEST(ReturnLastValueFloat32) { ReturnLastValue(MachineType::Float32()); }
|
||||
TEST(ReturnLastValueFloat64) { ReturnLastValue(MachineType::Float64()); }
|
||||
|
||||
void ReturnSumOfReturns(MachineType type) {
|
||||
for (int unused_stack_slots = 0; unused_stack_slots <= 2;
|
||||
++unused_stack_slots) {
|
||||
v8::internal::AccountingAllocator allocator;
|
||||
Zone zone(&allocator, ZONE_NAME);
|
||||
// Let {unused_stack_slots + 1} returns be on the stack.
|
||||
const int return_count = num_registers(type) + unused_stack_slots + 1;
|
||||
|
||||
CallDescriptor* desc =
|
||||
CreateMonoCallDescriptor(&zone, return_count, 0, type);
|
||||
|
||||
HandleAndZoneScope handles;
|
||||
RawMachineAssembler m(handles.main_isolate(),
|
||||
new (handles.main_zone()) Graph(handles.main_zone()),
|
||||
desc, MachineType::PointerRepresentation(),
|
||||
InstructionSelector::SupportedMachineOperatorFlags());
|
||||
|
||||
std::unique_ptr<Node* []> returns(new Node*[return_count]);
|
||||
|
||||
for (int i = 0; i < return_count; ++i) {
|
||||
returns[i] = Constant(m, type, i);
|
||||
}
|
||||
|
||||
m.Return(return_count, returns.get());
|
||||
|
||||
CompilationInfo info(ArrayVector("testing"), handles.main_zone(),
|
||||
Code::STUB);
|
||||
Handle<Code> code = Pipeline::GenerateCodeForTesting(
|
||||
&info, handles.main_isolate(), desc, m.graph(), m.Export());
|
||||
|
||||
// Generate caller.
|
||||
RawMachineAssemblerTester<int32_t> mt;
|
||||
Node* code_node = mt.HeapConstant(code);
|
||||
|
||||
Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node);
|
||||
|
||||
uint32_t expect = 0;
|
||||
Node* result = mt.Int32Constant(0);
|
||||
|
||||
for (int i = 0; i < return_count; ++i) {
|
||||
expect += i;
|
||||
result = mt.Int32Add(
|
||||
result,
|
||||
ToInt32(mt, type, mt.AddNode(mt.common()->Projection(i), call)));
|
||||
}
|
||||
|
||||
mt.Return(result);
|
||||
|
||||
CHECK_EQ(expect, mt.Call());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ReturnSumOfReturnsInt32) { ReturnSumOfReturns(MachineType::Int32()); }
|
||||
#if (!V8_TARGET_ARCH_32_BIT)
|
||||
TEST(ReturnSumOfReturnsInt64) { ReturnSumOfReturns(MachineType::Int64()); }
|
||||
#endif
|
||||
TEST(ReturnSumOfReturnsFloat32) { ReturnSumOfReturns(MachineType::Float32()); }
|
||||
TEST(ReturnSumOfReturnsFloat64) { ReturnSumOfReturns(MachineType::Float64()); }
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
Loading…
Reference in New Issue
Block a user