From c710e6584cb110b9451742874b1ea455adf3f280 Mon Sep 17 00:00:00 2001 From: Andreas Haas Date: Mon, 15 Jan 2018 15:41:45 +0100 Subject: [PATCH] [arm64][turbofan] Implement on-stack returns. This is the implementation of crrev.com/c/766371 for arm64. Original description: Add the ability to return (multiple) return values on the stack: - Extend stack frames with a new buffer region for return slots. This region is located at the end of a caller's frame such that its slots can be indexed as caller frame slots in a callee (located beyond its parameters) and assigned return values. - Adjust stack frame constructon and deconstruction accordingly. - Extend linkage computation to support register plus stack returns. - Reserve return slots in caller frame when respective calls occur. - Introduce and generate architecture instructions ('peek') for reading back results from return slots in the caller. - Aggressive tests. - Some minor clean-up. R=v8-arm-ports@googlegroups.com Change-Id: I6e344a23f359861c9a1ff5a6511651c2176ce9a8 Reviewed-on: https://chromium-review.googlesource.com/842545 Reviewed-by: Ben Titzer Commit-Queue: Andreas Haas Cr-Commit-Position: refs/heads/master@{#50585} --- src/compiler/arm64/code-generator-arm64.cc | 31 +++++ src/compiler/arm64/instruction-codes-arm64.h | 1 + .../arm64/instruction-scheduler-arm64.cc | 1 + .../arm64/instruction-selector-arm64.cc | 20 ++- src/compiler/frame.cc | 7 + test/cctest/cctest.status | 2 +- test/cctest/compiler/test-multiple-return.cc | 126 +++++++++++++----- 7 files changed, 155 insertions(+), 33 deletions(-) diff --git a/src/compiler/arm64/code-generator-arm64.cc b/src/compiler/arm64/code-generator-arm64.cc index 30cae077ea..62b0bdab98 100644 --- a/src/compiler/arm64/code-generator-arm64.cc +++ b/src/compiler/arm64/code-generator-arm64.cc @@ -1224,6 +1224,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kArm64Peek: { + int reverse_slot = i.InputInt32(0); + int offset = + FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); + if (instr->OutputAt(0)->IsFPRegister()) { + LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); + if (op->representation() == MachineRepresentation::kFloat64) { + __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset)); + } else { + DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); + __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset)); + } + } else { + __ Ldr(i.OutputRegister(), MemOperand(fp, offset)); + } + break; + } case kArm64Clz: __ Clz(i.OutputRegister64(), i.InputRegister64(0)); break; @@ -2287,6 +2304,9 @@ void CodeGenerator::AssembleConstructFrame() { descriptor->CalleeSavedRegisters()); CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits, descriptor->CalleeSavedFPRegisters()); + // The number of slots for returns has to be even to ensure the correct stack + // alignment. + const int returns = RoundUp(frame()->GetReturnSlotCount(), 2); if (frame_access_state()->has_frame()) { // Link the frame @@ -2358,6 +2378,7 @@ void CodeGenerator::AssembleConstructFrame() { // Skip callee-saved slots, which are pushed below. shrink_slots -= saves.Count(); shrink_slots -= saves_fp.Count(); + shrink_slots -= returns; // Build remainder of frame, including accounting for and filling-in // frame-specific header information, i.e. claiming the extra slot that @@ -2400,11 +2421,21 @@ void CodeGenerator::AssembleConstructFrame() { // CPURegList::GetCalleeSaved(): x30 is missing. // DCHECK(saves.list() == CPURegList::GetCalleeSaved().list()); __ PushCPURegList(saves); + + if (returns != 0) { + __ Claim(returns); + } } void CodeGenerator::AssembleReturn(InstructionOperand* pop) { CallDescriptor* descriptor = linkage()->GetIncomingDescriptor(); + const int returns = RoundUp(frame()->GetReturnSlotCount(), 2); + + if (returns != 0) { + __ Drop(returns); + } + // Restore registers. CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits, descriptor->CalleeSavedRegisters()); diff --git a/src/compiler/arm64/instruction-codes-arm64.h b/src/compiler/arm64/instruction-codes-arm64.h index 06741d67c2..820b55a99d 100644 --- a/src/compiler/arm64/instruction-codes-arm64.h +++ b/src/compiler/arm64/instruction-codes-arm64.h @@ -82,6 +82,7 @@ namespace compiler { V(Arm64Claim) \ V(Arm64Poke) \ V(Arm64PokePair) \ + V(Arm64Peek) \ V(Arm64Float32Cmp) \ V(Arm64Float32Add) \ V(Arm64Float32Sub) \ diff --git a/src/compiler/arm64/instruction-scheduler-arm64.cc b/src/compiler/arm64/instruction-scheduler-arm64.cc index 5be3399486..52a410cec3 100644 --- a/src/compiler/arm64/instruction-scheduler-arm64.cc +++ b/src/compiler/arm64/instruction-scheduler-arm64.cc @@ -292,6 +292,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64Ldrsw: case kArm64LdrW: case kArm64Ldr: + case kArm64Peek: return kIsLoadOperation; case kArm64Float64Mod: // This opcode will call a C Function which can diff --git a/src/compiler/arm64/instruction-selector-arm64.cc b/src/compiler/arm64/instruction-selector-arm64.cc index 9105265606..ac62a53215 100644 --- a/src/compiler/arm64/instruction-selector-arm64.cc +++ b/src/compiler/arm64/instruction-selector-arm64.cc @@ -1721,7 +1721,25 @@ void InstructionSelector::EmitPrepareArguments( void InstructionSelector::EmitPrepareResults(ZoneVector* results, const CallDescriptor* descriptor, Node* node) { - // TODO(ahaas): Port. + Arm64OperandGenerator g(this); + + int reverse_slot = 0; + for (PushParameter output : *results) { + if (!output.location.IsCallerFrameSlot()) continue; + reverse_slot += output.location.GetSizeInPointers(); + // Skip any alignment holes in nodes. + if (output.node == nullptr) continue; + DCHECK(!descriptor->IsCFunctionCall()); + + if (output.location.GetType() == MachineType::Float32()) { + MarkAsFloat32(output.node); + } else if (output.location.GetType() == MachineType::Float64()) { + MarkAsFloat64(output.node); + } + + Emit(kArm64Peek, g.DefineAsRegister(output.node), + g.UseImmediate(reverse_slot)); + } } bool InstructionSelector::IsTailCallAddressImmediate() { return false; } diff --git a/src/compiler/frame.cc b/src/compiler/frame.cc index 12cc25681f..0b6d7ac193 100644 --- a/src/compiler/frame.cc +++ b/src/compiler/frame.cc @@ -22,6 +22,13 @@ Frame::Frame(int fixed_frame_size_in_slots) int Frame::AlignFrame(int alignment) { int alignment_slots = alignment / kPointerSize; + // We have to align return slots separately, because they are claimed + // separately on the stack. + int return_delta = + alignment_slots - (return_slot_count_ & (alignment_slots - 1)); + if (return_delta != alignment_slots) { + frame_slot_count_ += return_delta; + } int delta = alignment_slots - (frame_slot_count_ & (alignment_slots - 1)); if (delta != alignment_slots) { frame_slot_count_ += delta; diff --git a/test/cctest/cctest.status b/test/cctest/cctest.status index 8941c4fea3..6a2784107d 100644 --- a/test/cctest/cctest.status +++ b/test/cctest/cctest.status @@ -169,7 +169,7 @@ ############################################################################## # TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC -['arch == arm64 or arch == s390 or arch == s390x or arch == ppc or arch == ppc64', { +['arch == s390 or arch == s390x or arch == ppc or arch == ppc64', { 'test-multiple-return/*': [SKIP], }], diff --git a/test/cctest/compiler/test-multiple-return.cc b/test/cctest/compiler/test-multiple-return.cc index b5b7e5b650..3ce1cea0a4 100644 --- a/test/cctest/compiler/test-multiple-return.cc +++ b/test/cctest/compiler/test-multiple-return.cc @@ -271,42 +271,47 @@ TEST_MULTI(Float64, MachineType::Float64()) #undef TEST_MULTI void ReturnLastValue(MachineType type) { - v8::internal::AccountingAllocator allocator; - Zone zone(&allocator, ZONE_NAME); - // Let 2 returns be on the stack. - const int return_count = num_registers(type) + 2; + for (int unused_stack_slots = 0; unused_stack_slots <= 2; + ++unused_stack_slots) { + v8::internal::AccountingAllocator allocator; + Zone zone(&allocator, ZONE_NAME); + // Let {unused_stack_slots + 1} returns be on the stack. + const int return_count = num_registers(type) + unused_stack_slots + 1; - CallDescriptor* desc = CreateMonoCallDescriptor(&zone, return_count, 0, type); + CallDescriptor* desc = + CreateMonoCallDescriptor(&zone, return_count, 0, type); - HandleAndZoneScope handles; - RawMachineAssembler m(handles.main_isolate(), - new (handles.main_zone()) Graph(handles.main_zone()), - desc, MachineType::PointerRepresentation(), - InstructionSelector::SupportedMachineOperatorFlags()); + HandleAndZoneScope handles; + RawMachineAssembler m(handles.main_isolate(), + new (handles.main_zone()) Graph(handles.main_zone()), + desc, MachineType::PointerRepresentation(), + InstructionSelector::SupportedMachineOperatorFlags()); - std::unique_ptr returns(new Node*[return_count]); + std::unique_ptr returns(new Node*[return_count]); - for (int i = 0; i < return_count; ++i) { - returns[i] = Constant(m, type, i); + for (int i = 0; i < return_count; ++i) { + returns[i] = Constant(m, type, i); + } + + m.Return(return_count, returns.get()); + + CompilationInfo info(ArrayVector("testing"), handles.main_zone(), + Code::STUB); + Handle code = Pipeline::GenerateCodeForTesting( + &info, handles.main_isolate(), desc, m.graph(), m.Export()); + + // Generate caller. + int expect = return_count - 1; + RawMachineAssemblerTester mt; + Node* code_node = mt.HeapConstant(code); + + Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node); + + mt.Return(ToInt32( + mt, type, mt.AddNode(mt.common()->Projection(return_count - 1), call))); + + CHECK_EQ(expect, mt.Call()); } - - m.Return(return_count, returns.get()); - - CompilationInfo info(ArrayVector("testing"), handles.main_zone(), Code::STUB); - Handle code = Pipeline::GenerateCodeForTesting( - &info, handles.main_isolate(), desc, m.graph(), m.Export()); - - // Generate caller. - int expect = return_count - 1; - RawMachineAssemblerTester mt; - Node* code_node = mt.HeapConstant(code); - - Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node); - - mt.Return(ToInt32( - mt, type, mt.AddNode(mt.common()->Projection(return_count - 1), call))); - - CHECK_EQ(expect, mt.Call()); } TEST(ReturnLastValueInt32) { ReturnLastValue(MachineType::Int32()); } @@ -316,6 +321,65 @@ TEST(ReturnLastValueInt64) { ReturnLastValue(MachineType::Int64()); } TEST(ReturnLastValueFloat32) { ReturnLastValue(MachineType::Float32()); } TEST(ReturnLastValueFloat64) { ReturnLastValue(MachineType::Float64()); } +void ReturnSumOfReturns(MachineType type) { + for (int unused_stack_slots = 0; unused_stack_slots <= 2; + ++unused_stack_slots) { + v8::internal::AccountingAllocator allocator; + Zone zone(&allocator, ZONE_NAME); + // Let {unused_stack_slots + 1} returns be on the stack. + const int return_count = num_registers(type) + unused_stack_slots + 1; + + CallDescriptor* desc = + CreateMonoCallDescriptor(&zone, return_count, 0, type); + + HandleAndZoneScope handles; + RawMachineAssembler m(handles.main_isolate(), + new (handles.main_zone()) Graph(handles.main_zone()), + desc, MachineType::PointerRepresentation(), + InstructionSelector::SupportedMachineOperatorFlags()); + + std::unique_ptr returns(new Node*[return_count]); + + for (int i = 0; i < return_count; ++i) { + returns[i] = Constant(m, type, i); + } + + m.Return(return_count, returns.get()); + + CompilationInfo info(ArrayVector("testing"), handles.main_zone(), + Code::STUB); + Handle code = Pipeline::GenerateCodeForTesting( + &info, handles.main_isolate(), desc, m.graph(), m.Export()); + + // Generate caller. + RawMachineAssemblerTester mt; + Node* code_node = mt.HeapConstant(code); + + Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node); + + uint32_t expect = 0; + Node* result = mt.Int32Constant(0); + + for (int i = 0; i < return_count; ++i) { + expect += i; + result = mt.Int32Add( + result, + ToInt32(mt, type, mt.AddNode(mt.common()->Projection(i), call))); + } + + mt.Return(result); + + CHECK_EQ(expect, mt.Call()); + } +} + +TEST(ReturnSumOfReturnsInt32) { ReturnSumOfReturns(MachineType::Int32()); } +#if (!V8_TARGET_ARCH_32_BIT) +TEST(ReturnSumOfReturnsInt64) { ReturnSumOfReturns(MachineType::Int64()); } +#endif +TEST(ReturnSumOfReturnsFloat32) { ReturnSumOfReturns(MachineType::Float32()); } +TEST(ReturnSumOfReturnsFloat64) { ReturnSumOfReturns(MachineType::Float64()); } + } // namespace compiler } // namespace internal } // namespace v8