[arm] Restrict grouping pushes before a TailCall to registers only

We optimize parallel moves performed before a TailCall by grouping adjacent
pushes. This way, we may use a single instruction to push multiple registers at
once. However, we also have support for pushing immediates and stack slots for
which the benefit is questionnable therefore this patch removes support for
them.

Concerning immediate pushes, it looks like a mistake since we do not have
support for this case in `AssembleMove` so this patch removes it. Furthermore,
if we add a test for this case, we see that a `push ip` instruction is
generated, effectively pushing whatever was in `ip` at the time instead of
pushing a constant.

Concerning stack slot pushes, we generate a more or less equivalent sequence of
instructions.

Finally, grouping floating point pushes is not used anywhere so this patch
removes support for this also.

Bug: v8:6553
Change-Id: I9b820d33361fc442dd813f66e1f96cda41009110
Reviewed-on: https://chromium-review.googlesource.com/567191
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Cr-Commit-Position: refs/heads/master@{#46718}
This commit is contained in:
Pierre Langlois 2017-07-13 10:55:16 +01:00 committed by Commit Bot
parent 93a1a16dbd
commit a72b2f88a8
6 changed files with 219 additions and 39 deletions

View File

@ -593,16 +593,6 @@ void FlushPendingPushRegisters(TurboAssembler* tasm,
pending_pushes->resize(0);
}
void AddPendingPushRegister(TurboAssembler* tasm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes,
Register reg) {
pending_pushes->push_back(reg);
if (pending_pushes->size() == 3 || reg.is(ip)) {
FlushPendingPushRegisters(tasm, frame_access_state, pending_pushes);
}
}
void AdjustStackPointerForTailCall(
TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
ZoneVector<Register>* pending_pushes = nullptr,
@ -629,9 +619,8 @@ void AdjustStackPointerForTailCall(
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, flags, &pushes);
GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
@ -646,21 +635,15 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
tasm(), frame_access_state(),
destination_location.index() - pending_pushes.size(),
&pending_pushes);
if (source.IsStackSlot()) {
LocationOperand source_location(LocationOperand::cast(source));
__ ldr(ip, g.SlotToMemOperand(source_location.index()));
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
ip);
} else if (source.IsRegister()) {
LocationOperand source_location(LocationOperand::cast(source));
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
source_location.GetRegister());
} else if (source.IsImmediate()) {
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
ip);
} else {
// Pushes of non-scalar data types is not supported.
UNIMPLEMENTED();
// Pushes of non-register data types are not supported.
DCHECK(source.IsRegister());
LocationOperand source_location(LocationOperand::cast(source));
pending_pushes.push_back(source_location.GetRegister());
// TODO(arm): We can push more than 3 registers at once. Add support in
// the macro-assembler for pushing a list of registers.
if (pending_pushes.size() == 3) {
FlushPendingPushRegisters(tasm(), frame_access_state(),
&pending_pushes);
}
move->Eliminate();
}

View File

@ -328,16 +328,12 @@ bool CodeGenerator::IsValidPush(InstructionOperand source,
((push_type & CodeGenerator::kImmediatePush) != 0)) {
return true;
}
if ((source.IsRegister() || source.IsStackSlot()) &&
((push_type & CodeGenerator::kScalarPush) != 0)) {
if (source.IsRegister() &&
((push_type & CodeGenerator::kRegisterPush) != 0)) {
return true;
}
if ((source.IsFloatRegister() || source.IsFloatStackSlot()) &&
((push_type & CodeGenerator::kFloat32Push) != 0)) {
return true;
}
if ((source.IsDoubleRegister() || source.IsFloatStackSlot()) &&
((push_type & CodeGenerator::kFloat64Push) != 0)) {
if (source.IsStackSlot() &&
((push_type & CodeGenerator::kStackSlotPush) != 0)) {
return true;
}
return false;

View File

@ -183,10 +183,9 @@ class CodeGenerator final : public GapResolver::Assembler {
enum PushTypeFlag {
kImmediatePush = 0x1,
kScalarPush = 0x2,
kFloat32Push = 0x4,
kFloat64Push = 0x8,
kFloatPush = kFloat32Push | kFloat64Push
kRegisterPush = 0x2,
kStackSlotPush = 0x4,
kScalarPush = kRegisterPush | kStackSlotPush
};
typedef base::Flags<PushTypeFlag> PushTypeFlags;
@ -308,6 +307,7 @@ class CodeGenerator final : public GapResolver::Assembler {
};
friend class OutOfLineCode;
friend class CodeGeneratorTester;
Zone* zone_;
FrameAccessState* frame_access_state_;

View File

@ -27,6 +27,7 @@ v8_executable("cctest") {
"compiler/test-basic-block-profiler.cc",
"compiler/test-branch-combine.cc",
"compiler/test-code-assembler.cc",
"compiler/test-code-generator.cc",
"compiler/test-gap-resolver.cc",
"compiler/test-graph-visualizer.cc",
"compiler/test-instruction.cc",

View File

@ -45,6 +45,7 @@
'compiler/test-run-unwinding-info.cc',
'compiler/test-gap-resolver.cc',
'compiler/test-graph-visualizer.cc',
'compiler/test-code-generator.cc',
'compiler/test-code-assembler.cc',
'compiler/test-instruction.cc',
'compiler/test-js-context-specialization.cc',

View File

@ -0,0 +1,199 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/codegen.h"
#include "src/compilation-info.h"
#include "src/compiler/code-generator.h"
#include "src/compiler/instruction.h"
#include "src/compiler/linkage.h"
#include "src/isolate.h"
#include "test/cctest/cctest.h"
namespace v8 {
namespace internal {
namespace compiler {
class CodeGeneratorTester : public InitializedHandleScope {
public:
CodeGeneratorTester()
: zone_(main_isolate()->allocator(), ZONE_NAME),
info_(ArrayVector("test"), main_isolate(), &zone_,
Code::ComputeFlags(Code::STUB)),
descriptor_(Linkage::GetJSCallDescriptor(&zone_, false, 0,
CallDescriptor::kNoFlags)),
linkage_(descriptor_),
blocks_(&zone_),
sequence_(main_isolate(), &zone_, &blocks_),
frame_(descriptor_->CalculateFixedFrameSize()),
generator_(&zone_, &frame_, &linkage_, &sequence_, &info_,
base::Optional<OsrHelper>(), kNoSourcePosition) {
info_.set_prologue_offset(generator_.tasm()->pc_offset());
}
enum PushTypeFlag {
kRegisterPush = CodeGenerator::kRegisterPush,
kStackSlotPush = CodeGenerator::kStackSlotPush,
kScalarPush = CodeGenerator::kScalarPush
};
void CheckAssembleTailCallGaps(Instruction* instr,
int first_unused_stack_slot,
CodeGeneratorTester::PushTypeFlag push_type) {
generator_.AssembleTailCallBeforeGap(instr, first_unused_stack_slot);
#if defined(V8_TARGET_ARCH_ARM)
// Only folding register pushes is supported on ARM.
bool supported = ((push_type & CodeGenerator::kRegisterPush) == push_type);
#elif defined(V8_TARGET_ARCH_X64) || defined(V8_TARGET_ARCH_IA32) || \
defined(V8_TARGET_ARCH_X87) || defined(V8_TARGET_ARCH_S390X) || \
defined(V8_TARGET_ARCH_PPC64)
bool supported = ((push_type & CodeGenerator::kScalarPush) == push_type);
#else
bool supported = false;
#endif
if (supported) {
// Architectures supporting folding adjacent pushes should now have
// resolved all moves.
for (const auto& move :
*instr->parallel_moves()[Instruction::FIRST_GAP_POSITION]) {
CHECK(move->IsEliminated());
}
}
generator_.AssembleGaps(instr);
generator_.AssembleTailCallAfterGap(instr, first_unused_stack_slot);
}
Handle<Code> Finalize() {
generator_.FinishCode();
generator_.safepoints()->Emit(generator_.tasm(),
frame_.GetTotalFrameSlotCount());
return generator_.FinalizeCode();
}
void Disassemble() {
if (FLAG_print_code) {
HandleScope scope(main_isolate());
Handle<Code> code = Finalize();
code->Print();
}
}
Zone* zone() { return &zone_; }
private:
Zone zone_;
CompilationInfo info_;
CallDescriptor* descriptor_;
Linkage linkage_;
ZoneVector<InstructionBlock*> blocks_;
InstructionSequence sequence_;
Frame frame_;
CodeGenerator generator_;
};
TEST(AssembleTailCallGap) {
const RegisterConfiguration* conf = RegisterConfiguration::Turbofan();
// This test assumes at least 4 registers are allocatable.
CHECK(conf->num_allocatable_general_registers() >= 4);
auto r0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(0));
auto r1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(1));
auto r2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(2));
auto r3 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
conf->GetAllocatableGeneralCode(3));
auto slot_minus_4 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -4);
auto slot_minus_3 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -3);
auto slot_minus_2 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -2);
auto slot_minus_1 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, -1);
// Avoid slot 0 for architectures which use it store the return address.
int first_slot = V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK ? 1 : 0;
auto slot_0 = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot);
auto slot_1 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 1);
auto slot_2 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 2);
auto slot_3 =
AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kTagged, first_slot + 3);
// These tests all generate series of moves that the code generator should
// detect as adjacent pushes. Depending on the architecture, we make sure
// these moves get eliminated.
// Also, disassembling with `--print-code` is useful when debugging.
{
// Generate a series of register pushes only.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r3, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r2, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r1, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r0, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kRegisterPush);
c.Disassemble();
}
{
// Generate a series of stack pushes only.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_4, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_3, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_2, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_1, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kStackSlotPush);
c.Disassemble();
}
{
// Generate a mix of stack and register pushes.
CodeGeneratorTester c;
Instruction* instr = Instruction::New(c.zone(), kArchNop);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_2, slot_0);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r1, slot_1);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(slot_minus_1, slot_2);
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
->AddMove(r0, slot_3);
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
CodeGeneratorTester::kScalarPush);
c.Disassemble();
}
}
} // namespace compiler
} // namespace internal
} // namespace v8