[arm] Restrict grouping pushes before a TailCall to registers only
We optimize parallel moves performed before a TailCall by grouping adjacent pushes. This way, we may use a single instruction to push multiple registers at once. However, we also have support for pushing immediates and stack slots for which the benefit is questionnable therefore this patch removes support for them. Concerning immediate pushes, it looks like a mistake since we do not have support for this case in `AssembleMove` so this patch removes it. Furthermore, if we add a test for this case, we see that a `push ip` instruction is generated, effectively pushing whatever was in `ip` at the time instead of pushing a constant. Concerning stack slot pushes, we generate a more or less equivalent sequence of instructions. Finally, grouping floating point pushes is not used anywhere so this patch removes support for this also. Bug: v8:6553 Change-Id: I9b820d33361fc442dd813f66e1f96cda41009110 Reviewed-on: https://chromium-review.googlesource.com/567191 Reviewed-by: Benedikt Meurer <bmeurer@chromium.org> Commit-Queue: Pierre Langlois <pierre.langlois@arm.com> Cr-Commit-Position: refs/heads/master@{#46718}
This commit is contained in:
parent
93a1a16dbd
commit
a72b2f88a8
@ -593,16 +593,6 @@ void FlushPendingPushRegisters(TurboAssembler* tasm,
|
||||
pending_pushes->resize(0);
|
||||
}
|
||||
|
||||
void AddPendingPushRegister(TurboAssembler* tasm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes,
|
||||
Register reg) {
|
||||
pending_pushes->push_back(reg);
|
||||
if (pending_pushes->size() == 3 || reg.is(ip)) {
|
||||
FlushPendingPushRegisters(tasm, frame_access_state, pending_pushes);
|
||||
}
|
||||
}
|
||||
|
||||
void AdjustStackPointerForTailCall(
|
||||
TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
|
||||
ZoneVector<Register>* pending_pushes = nullptr,
|
||||
@ -629,9 +619,8 @@ void AdjustStackPointerForTailCall(
|
||||
|
||||
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
|
||||
ZoneVector<MoveOperands*> pushes(zone());
|
||||
GetPushCompatibleMoves(instr, flags, &pushes);
|
||||
GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
|
||||
|
||||
if (!pushes.empty() &&
|
||||
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
|
||||
@ -646,21 +635,15 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
tasm(), frame_access_state(),
|
||||
destination_location.index() - pending_pushes.size(),
|
||||
&pending_pushes);
|
||||
if (source.IsStackSlot()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
__ ldr(ip, g.SlotToMemOperand(source_location.index()));
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else if (source.IsRegister()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
source_location.GetRegister());
|
||||
} else if (source.IsImmediate()) {
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else {
|
||||
// Pushes of non-scalar data types is not supported.
|
||||
UNIMPLEMENTED();
|
||||
// Pushes of non-register data types are not supported.
|
||||
DCHECK(source.IsRegister());
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
pending_pushes.push_back(source_location.GetRegister());
|
||||
// TODO(arm): We can push more than 3 registers at once. Add support in
|
||||
// the macro-assembler for pushing a list of registers.
|
||||
if (pending_pushes.size() == 3) {
|
||||
FlushPendingPushRegisters(tasm(), frame_access_state(),
|
||||
&pending_pushes);
|
||||
}
|
||||
move->Eliminate();
|
||||
}
|
||||
|
@ -328,16 +328,12 @@ bool CodeGenerator::IsValidPush(InstructionOperand source,
|
||||
((push_type & CodeGenerator::kImmediatePush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsRegister() || source.IsStackSlot()) &&
|
||||
((push_type & CodeGenerator::kScalarPush) != 0)) {
|
||||
if (source.IsRegister() &&
|
||||
((push_type & CodeGenerator::kRegisterPush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsFloatRegister() || source.IsFloatStackSlot()) &&
|
||||
((push_type & CodeGenerator::kFloat32Push) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsDoubleRegister() || source.IsFloatStackSlot()) &&
|
||||
((push_type & CodeGenerator::kFloat64Push) != 0)) {
|
||||
if (source.IsStackSlot() &&
|
||||
((push_type & CodeGenerator::kStackSlotPush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -183,10 +183,9 @@ class CodeGenerator final : public GapResolver::Assembler {
|
||||
|
||||
enum PushTypeFlag {
|
||||
kImmediatePush = 0x1,
|
||||
kScalarPush = 0x2,
|
||||
kFloat32Push = 0x4,
|
||||
kFloat64Push = 0x8,
|
||||
kFloatPush = kFloat32Push | kFloat64Push
|
||||
kRegisterPush = 0x2,
|
||||
kStackSlotPush = 0x4,
|
||||
kScalarPush = kRegisterPush | kStackSlotPush
|
||||
};
|
||||
|
||||
typedef base::Flags<PushTypeFlag> PushTypeFlags;
|
||||
@ -308,6 +307,7 @@ class CodeGenerator final : public GapResolver::Assembler {
|
||||
};
|
||||
|
||||
friend class OutOfLineCode;
|
||||
friend class CodeGeneratorTester;
|
||||
|
||||
Zone* zone_;
|
||||
FrameAccessState* frame_access_state_;
|
||||
|
@ -27,6 +27,7 @@ v8_executable("cctest") {
|
||||
"compiler/test-basic-block-profiler.cc",
|
||||
"compiler/test-branch-combine.cc",
|
||||
"compiler/test-code-assembler.cc",
|
||||
"compiler/test-code-generator.cc",
|
||||
"compiler/test-gap-resolver.cc",
|
||||
"compiler/test-graph-visualizer.cc",
|
||||
"compiler/test-instruction.cc",
|
||||
|
@ -45,6 +45,7 @@
|
||||
'compiler/test-run-unwinding-info.cc',
|
||||
'compiler/test-gap-resolver.cc',
|
||||
'compiler/test-graph-visualizer.cc',
|
||||
'compiler/test-code-generator.cc',
|
||||
'compiler/test-code-assembler.cc',
|
||||
'compiler/test-instruction.cc',
|
||||
'compiler/test-js-context-specialization.cc',
|
||||
|
199
test/cctest/compiler/test-code-generator.cc
Normal file
199
test/cctest/compiler/test-code-generator.cc
Normal file
@ -0,0 +1,199 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/codegen.h"
|
||||
#include "src/compilation-info.h"
|
||||
#include "src/compiler/code-generator.h"
|
||||
#include "src/compiler/instruction.h"
|
||||
#include "src/compiler/linkage.h"
|
||||
#include "src/isolate.h"
|
||||
|
||||
#include "test/cctest/cctest.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
class CodeGeneratorTester : public InitializedHandleScope {
|
||||
public:
|
||||
CodeGeneratorTester()
|
||||
: zone_(main_isolate()->allocator(), ZONE_NAME),
|
||||
info_(ArrayVector("test"), main_isolate(), &zone_,
|
||||
Code::ComputeFlags(Code::STUB)),
|
||||
descriptor_(Linkage::GetJSCallDescriptor(&zone_, false, 0,
|
||||
CallDescriptor::kNoFlags)),
|
||||
linkage_(descriptor_),
|
||||
blocks_(&zone_),
|
||||
sequence_(main_isolate(), &zone_, &blocks_),
|
||||
frame_(descriptor_->CalculateFixedFrameSize()),
|
||||
generator_(&zone_, &frame_, &linkage_, &sequence_, &info_,
|
||||
base::Optional<OsrHelper>(), kNoSourcePosition) {
|
||||
info_.set_prologue_offset(generator_.tasm()->pc_offset());
|
||||
}
|
||||
|
||||
enum PushTypeFlag {
|
||||
kRegisterPush = CodeGenerator::kRegisterPush,
|
||||
kStackSlotPush = CodeGenerator::kStackSlotPush,
|
||||
kScalarPush = CodeGenerator::kScalarPush
|
||||
};
|
||||
|
||||
void CheckAssembleTailCallGaps(Instruction* instr,
|
||||
int first_unused_stack_slot,
|
||||
CodeGeneratorTester::PushTypeFlag push_type) {
|
||||
generator_.AssembleTailCallBeforeGap(instr, first_unused_stack_slot);
|
||||
#if defined(V8_TARGET_ARCH_ARM)
|
||||
// Only folding register pushes is supported on ARM.
|
||||
bool supported = ((push_type & CodeGenerator::kRegisterPush) == push_type);
|
||||
#elif defined(V8_TARGET_ARCH_X64) || defined(V8_TARGET_ARCH_IA32) || \
|
||||
defined(V8_TARGET_ARCH_X87) || defined(V8_TARGET_ARCH_S390X) || \
|
||||
defined(V8_TARGET_ARCH_PPC64)
|
||||
bool supported = ((push_type & CodeGenerator::kScalarPush) == push_type);
|
||||
#else
|
||||
bool supported = false;
|
||||
#endif
|
||||
if (supported) {
|
||||
// Architectures supporting folding adjacent pushes should now have
|
||||
// resolved all moves.
|
||||
for (const auto& move :
|
||||
*instr->parallel_moves()[Instruction::FIRST_GAP_POSITION]) {
|
||||
CHECK(move->IsEliminated());
|
||||
}
|
||||
}
|
||||
generator_.AssembleGaps(instr);
|
||||
generator_.AssembleTailCallAfterGap(instr, first_unused_stack_slot);
|
||||
}
|
||||
|
||||
Handle<Code> Finalize() {
|
||||
generator_.FinishCode();
|
||||
generator_.safepoints()->Emit(generator_.tasm(),
|
||||
frame_.GetTotalFrameSlotCount());
|
||||
return generator_.FinalizeCode();
|
||||
}
|
||||
|
||||
void Disassemble() {
|
||||
if (FLAG_print_code) {
|
||||
HandleScope scope(main_isolate());
|
||||
Handle<Code> code = Finalize();
|
||||
code->Print();
|
||||
}
|
||||
}
|
||||
|
||||
Zone* zone() { return &zone_; }
|
||||
|
||||
private:
|
||||
Zone zone_;
|
||||
CompilationInfo info_;
|
||||
CallDescriptor* descriptor_;
|
||||
Linkage linkage_;
|
||||
ZoneVector<InstructionBlock*> blocks_;
|
||||
InstructionSequence sequence_;
|
||||
Frame frame_;
|
||||
CodeGenerator generator_;
|
||||
};
|
||||
|
||||
TEST(AssembleTailCallGap) {
|
||||
const RegisterConfiguration* conf = RegisterConfiguration::Turbofan();
|
||||
|
||||
// This test assumes at least 4 registers are allocatable.
|
||||
CHECK(conf->num_allocatable_general_registers() >= 4);
|
||||
|
||||
auto r0 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(0));
|
||||
auto r1 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(1));
|
||||
auto r2 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(2));
|
||||
auto r3 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(3));
|
||||
|
||||
auto slot_minus_4 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -4);
|
||||
auto slot_minus_3 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -3);
|
||||
auto slot_minus_2 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -2);
|
||||
auto slot_minus_1 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -1);
|
||||
|
||||
// Avoid slot 0 for architectures which use it store the return address.
|
||||
int first_slot = V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK ? 1 : 0;
|
||||
auto slot_0 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot);
|
||||
auto slot_1 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 1);
|
||||
auto slot_2 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 2);
|
||||
auto slot_3 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 3);
|
||||
|
||||
// These tests all generate series of moves that the code generator should
|
||||
// detect as adjacent pushes. Depending on the architecture, we make sure
|
||||
// these moves get eliminated.
|
||||
// Also, disassembling with `--print-code` is useful when debugging.
|
||||
|
||||
{
|
||||
// Generate a series of register pushes only.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r3, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r2, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r1, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r0, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kRegisterPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
|
||||
{
|
||||
// Generate a series of stack pushes only.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_4, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_3, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_2, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_1, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kStackSlotPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
|
||||
{
|
||||
// Generate a mix of stack and register pushes.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_2, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r1, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_1, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r0, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kScalarPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
Loading…
Reference in New Issue
Block a user