Reland "[arm] Restrict grouping pushes before a TailCall to registers only"

This is a reland of a72b2f88a8
Original change's description:
> [arm] Restrict grouping pushes before a TailCall to registers only
>
> We optimize parallel moves performed before a TailCall by grouping adjacent
> pushes. This way, we may use a single instruction to push multiple registers at
> once. However, we also have support for pushing immediates and stack slots for
> which the benefit is questionnable therefore this patch removes support for
> them.
>
> Concerning immediate pushes, it looks like a mistake since we do not have
> support for this case in `AssembleMove` so this patch removes it. Furthermore,
> if we add a test for this case, we see that a `push ip` instruction is
> generated, effectively pushing whatever was in `ip` at the time instead of
> pushing a constant.
>
> Concerning stack slot pushes, we generate a more or less equivalent sequence of
> instructions.
>
> Finally, grouping floating point pushes is not used anywhere so this patch
> removes support for this also.
>
> Bug: v8:6553
> Change-Id: I9b820d33361fc442dd813f66e1f96cda41009110
> Reviewed-on: https://chromium-review.googlesource.com/567191
> Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
> Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
> Cr-Commit-Position: refs/heads/master@{#46718}

Bug: v8:6553
Change-Id: Ib9a55dae7cc5db6185d163c56088ff23426d04bb
Reviewed-on: https://chromium-review.googlesource.com/576087
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Cr-Commit-Position: refs/heads/master@{#46754}
This commit is contained in:
Pierre Langlois 2017-07-13 10:55:16 +01:00 committed by Commit Bot
parent 0c12d88e52
commit 79bcb45447
6 changed files with 219 additions and 39 deletions

View File

@ -593,16 +593,6 @@ void FlushPendingPushRegisters(TurboAssembler* tasm,
pending_pushes->resize(0);
}
void AddPendingPushRegister(TurboAssembler* tasm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes,
Register reg) {
pending_pushes->push_back(reg);
if (pending_pushes->size() == 3 || reg.is(ip)) {
FlushPendingPushRegisters(tasm, frame_access_state, pending_pushes);
}
}
void AdjustStackPointerForTailCall(
TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
ZoneVector<Register>* pending_pushes = nullptr,
@ -629,9 +619,8 @@ void AdjustStackPointerForTailCall(
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, flags, &pushes);
GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
@ -646,21 +635,15 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
tasm(), frame_access_state(),
destination_location.index() - pending_pushes.size(),
&pending_pushes);
if (source.IsStackSlot()) {
LocationOperand source_location(LocationOperand::cast(source));
__ ldr(ip, g.SlotToMemOperand(source_location.index()));
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
ip);
} else if (source.IsRegister()) {
LocationOperand source_location(LocationOperand::cast(source));
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
source_location.GetRegister());
} else if (source.IsImmediate()) {
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
ip);
} else {
// Pushes of non-scalar data types is not supported.
UNIMPLEMENTED();
// Pushes of non-register data types are not supported.
DCHECK(source.IsRegister());
LocationOperand source_location(LocationOperand::cast(source));
pending_pushes.push_back(source_location.GetRegister());
// TODO(arm): We can push more than 3 registers at once. Add support in
// the macro-assembler for pushing a list of registers.
if (pending_pushes.size() == 3) {
FlushPendingPushRegisters(tasm(), frame_access_state(),
&pending_pushes);
}
move->Eliminate();
}

View File

@ -328,16 +328,12 @@ bool CodeGenerator::IsValidPush(InstructionOperand source,
((push_type & CodeGenerator::kImmediatePush) != 0)) {
return true;
}
if ((source.IsRegister() || source.IsStackSlot()) &&
((push_type & CodeGenerator::kScalarPush) != 0)) {
if (source.IsRegister() &&
((push_type & CodeGenerator::kRegisterPush) != 0)) {
return true;
}
if ((source.IsFloatRegister() || source.IsFloatStackSlot()) &&
((push_type & CodeGenerator::kFloat32Push) != 0)) {
return true;
}
if ((source.IsDoubleRegister() || source.IsFloatStackSlot()) &&
((push_type & CodeGenerator::kFloat64Push) != 0)) {
if (source.IsStackSlot() &&
((push_type & CodeGenerator::kStackSlotPush) != 0)) {
return true;
}
return false;

View File

@ -183,10 +183,9 @@ class CodeGenerator final : public GapResolver::Assembler {
enum PushTypeFlag {
kImmediatePush = 0x1,
kScalarPush = 0x2,
kFloat32Push = 0x4,
kFloat64Push = 0x8,
kFloatPush = kFloat32Push | kFloat64Push
kRegisterPush = 0x2,
kStackSlotPush = 0x4,
kScalarPush = kRegisterPush | kStackSlotPush
};
typedef base::Flags<PushTypeFlag> PushTypeFlags;
@ -308,6 +307,7 @@ class CodeGenerator final : public GapResolver::Assembler {
};
friend class OutOfLineCode;
friend class CodeGeneratorTester;
Zone* zone_;
FrameAccessState* frame_access_state_;

View File

@ -27,6 +27,7 @@ v8_executable("cctest") {
"compiler/test-basic-block-profiler.cc",
"compiler/test-branch-combine.cc",
"compiler/test-code-assembler.cc",
"compiler/test-code-generator.cc",
"compiler/test-gap-resolver.cc",
"compiler/test-graph-visualizer.cc",
"compiler/test-instruction.cc",

View File

@ -45,6 +45,7 @@
'compiler/test-run-unwinding-info.cc',
'compiler/test-gap-resolver.cc',
'compiler/test-graph-visualizer.cc',
'compiler/test-code-generator.cc',
'compiler/test-code-assembler.cc',
'compiler/test-instruction.cc',
'compiler/test-js-context-specialization.cc',

View File

@ -0,0 +1,199 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/codegen.h"
#include "src/compilation-info.h"
#include "src/compiler/code-generator.h"
#include "src/compiler/instruction.h"
#include "src/compiler/linkage.h"
#include "src/isolate.h"
#include "test/cctest/cctest.h"
namespace v8 {
namespace internal {
namespace compiler {
class CodeGeneratorTester : public InitializedHandleScope {
public:
CodeGeneratorTester()
: zone_(main_isolate()->allocator(), ZONE_NAME),
info_(ArrayVector("test"), main_isolate(), &zone_,
Code::ComputeFlags(Code::STUB)),
descriptor_(Linkage::GetJSCallDescriptor(&zone_, false, 0,
CallDescriptor::kNoFlags)),
linkage_(descriptor_),
blocks_(&zone_),
sequence_(main_isolate(), &zone_, &blocks_),
frame_(descriptor_->CalculateFixedFrameSize()),
generator_(&zone_, &frame_, &linkage_, &sequence_, &info_,
base::Optional<OsrHelper>(), kNoSourcePosition) {
info_.set_prologue_offset(generator_.tasm()->pc_offset());
}
enum PushTypeFlag {
kRegisterPush = CodeGenerator::kRegisterPush,
kStackSlotPush = CodeGenerator::kStackSlotPush,
kScalarPush = CodeGenerator::kScalarPush
};
void CheckAssembleTailCallGaps(Instruction* instr,
int first_unused_stack_slot,
CodeGeneratorTester::PushTypeFlag push_type) {
generator_.AssembleTailCallBeforeGap(instr, first_unused_stack_slot);
#if defined(V8_TARGET_ARCH_ARM)
// Only folding register pushes is supported on ARM.
bool supported = ((push_type & CodeGenerator::kRegisterPush) == push_type);
#elif defined(V8_TARGET_ARCH_X64) || defined(V8_TARGET_ARCH_IA32) || \
defined(V8_TARGET_ARCH_X87) || defined(V8_TARGET_ARCH_S390X) || \
defined(V8_TARGET_ARCH_PPC64)
bool supported = ((push_type & CodeGenerator::kScalarPush) == push_type);
#else
bool supported = false;
#endif
if (supported) {
// Architectures supporting folding adjacent pushes should now have
// resolved all moves.
for (const auto& move :
*instr->parallel_moves()[Instruction::FIRST_GAP_POSITION]) {
CHECK(move->IsEliminated());
}
}
generator_.AssembleGaps(instr);
generator_.AssembleTailCallAfterGap(instr, first_unused_stack_slot);
}
Handle<Code> Finalize() {
generator_.FinishCode();
generator_.safepoints()->Emit(generator_.tasm(),
frame_.GetTotalFrameSlotCount());
return generator_.FinalizeCode();
}
void Disassemble() {
HandleScope scope(main_isolate());
Handle<Code> code = Finalize();
if (FLAG_print_code) {
code->Print();
}
}
Zone* zone() { return &zone_; }
private:
Zone zone_;
CompilationInfo info_;
CallDescriptor* descriptor_;
Linkage linkage_;
ZoneVector<InstructionBlock*> blocks_;
InstructionSequence sequence_;
Frame frame_;
CodeGenerator generator_;
};
TEST(AssembleTailCallGap) {
const RegisterConfiguration* conf = RegisterConfiguration::Turbofan();
// This test assumes at least 4 registers are allocatable.
CHECK(conf->num_allocatable_general_registers() >= 4);
auto r0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(0));
auto r1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(1));
auto r2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(2));
auto r3 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(3));
auto slot_minus_4 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -4);
auto slot_minus_3 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -3);
auto slot_minus_2 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -2);
auto slot_minus_1 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -1);
// Avoid slot 0 for architectures which use it store the return address.
int first_slot = V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK ? 1 : 0;
auto slot_0 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot);
auto slot_1 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 1);
auto slot_2 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 2);
auto slot_3 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 3);
// These tests all generate series of moves that the code generator should
// detect as adjacent pushes. Depending on the architecture, we make sure
// these moves get eliminated.
// Also, disassembling with `--print-code` is useful when debugging.
{
// Generate a series of register pushes only.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r3, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r2, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r1, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r0, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kRegisterPush);
c.Disassemble();
}
{
// Generate a series of stack pushes only.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_4, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_3, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_2, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_1, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kStackSlotPush);
c.Disassemble();
}
{
// Generate a mix of stack and register pushes.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_2, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r1, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_1, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r0, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kScalarPush);
c.Disassemble();
}
}
} // namespace compiler
} // namespace internal
} // namespace v8