Reland "[arm] Restrict grouping pushes before a TailCall to registers only"
This is a reland of a72b2f88a8
Original change's description:
> [arm] Restrict grouping pushes before a TailCall to registers only
>
> We optimize parallel moves performed before a TailCall by grouping adjacent
> pushes. This way, we may use a single instruction to push multiple registers at
> once. However, we also have support for pushing immediates and stack slots for
> which the benefit is questionnable therefore this patch removes support for
> them.
>
> Concerning immediate pushes, it looks like a mistake since we do not have
> support for this case in `AssembleMove` so this patch removes it. Furthermore,
> if we add a test for this case, we see that a `push ip` instruction is
> generated, effectively pushing whatever was in `ip` at the time instead of
> pushing a constant.
>
> Concerning stack slot pushes, we generate a more or less equivalent sequence of
> instructions.
>
> Finally, grouping floating point pushes is not used anywhere so this patch
> removes support for this also.
>
> Bug: v8:6553
> Change-Id: I9b820d33361fc442dd813f66e1f96cda41009110
> Reviewed-on: https://chromium-review.googlesource.com/567191
> Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
> Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
> Cr-Commit-Position: refs/heads/master@{#46718}
Bug: v8:6553
Change-Id: Ib9a55dae7cc5db6185d163c56088ff23426d04bb
Reviewed-on: https://chromium-review.googlesource.com/576087
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Cr-Commit-Position: refs/heads/master@{#46754}
This commit is contained in:
parent
0c12d88e52
commit
79bcb45447
@ -593,16 +593,6 @@ void FlushPendingPushRegisters(TurboAssembler* tasm,
|
||||
pending_pushes->resize(0);
|
||||
}
|
||||
|
||||
void AddPendingPushRegister(TurboAssembler* tasm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes,
|
||||
Register reg) {
|
||||
pending_pushes->push_back(reg);
|
||||
if (pending_pushes->size() == 3 || reg.is(ip)) {
|
||||
FlushPendingPushRegisters(tasm, frame_access_state, pending_pushes);
|
||||
}
|
||||
}
|
||||
|
||||
void AdjustStackPointerForTailCall(
|
||||
TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
|
||||
ZoneVector<Register>* pending_pushes = nullptr,
|
||||
@ -629,9 +619,8 @@ void AdjustStackPointerForTailCall(
|
||||
|
||||
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
|
||||
ZoneVector<MoveOperands*> pushes(zone());
|
||||
GetPushCompatibleMoves(instr, flags, &pushes);
|
||||
GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
|
||||
|
||||
if (!pushes.empty() &&
|
||||
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
|
||||
@ -646,21 +635,15 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
tasm(), frame_access_state(),
|
||||
destination_location.index() - pending_pushes.size(),
|
||||
&pending_pushes);
|
||||
if (source.IsStackSlot()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
__ ldr(ip, g.SlotToMemOperand(source_location.index()));
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else if (source.IsRegister()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
source_location.GetRegister());
|
||||
} else if (source.IsImmediate()) {
|
||||
AddPendingPushRegister(tasm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else {
|
||||
// Pushes of non-scalar data types is not supported.
|
||||
UNIMPLEMENTED();
|
||||
// Pushes of non-register data types are not supported.
|
||||
DCHECK(source.IsRegister());
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
pending_pushes.push_back(source_location.GetRegister());
|
||||
// TODO(arm): We can push more than 3 registers at once. Add support in
|
||||
// the macro-assembler for pushing a list of registers.
|
||||
if (pending_pushes.size() == 3) {
|
||||
FlushPendingPushRegisters(tasm(), frame_access_state(),
|
||||
&pending_pushes);
|
||||
}
|
||||
move->Eliminate();
|
||||
}
|
||||
|
@ -328,16 +328,12 @@ bool CodeGenerator::IsValidPush(InstructionOperand source,
|
||||
((push_type & CodeGenerator::kImmediatePush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsRegister() || source.IsStackSlot()) &&
|
||||
((push_type & CodeGenerator::kScalarPush) != 0)) {
|
||||
if (source.IsRegister() &&
|
||||
((push_type & CodeGenerator::kRegisterPush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsFloatRegister() || source.IsFloatStackSlot()) &&
|
||||
((push_type & CodeGenerator::kFloat32Push) != 0)) {
|
||||
return true;
|
||||
}
|
||||
if ((source.IsDoubleRegister() || source.IsFloatStackSlot()) &&
|
||||
((push_type & CodeGenerator::kFloat64Push) != 0)) {
|
||||
if (source.IsStackSlot() &&
|
||||
((push_type & CodeGenerator::kStackSlotPush) != 0)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -183,10 +183,9 @@ class CodeGenerator final : public GapResolver::Assembler {
|
||||
|
||||
enum PushTypeFlag {
|
||||
kImmediatePush = 0x1,
|
||||
kScalarPush = 0x2,
|
||||
kFloat32Push = 0x4,
|
||||
kFloat64Push = 0x8,
|
||||
kFloatPush = kFloat32Push | kFloat64Push
|
||||
kRegisterPush = 0x2,
|
||||
kStackSlotPush = 0x4,
|
||||
kScalarPush = kRegisterPush | kStackSlotPush
|
||||
};
|
||||
|
||||
typedef base::Flags<PushTypeFlag> PushTypeFlags;
|
||||
@ -308,6 +307,7 @@ class CodeGenerator final : public GapResolver::Assembler {
|
||||
};
|
||||
|
||||
friend class OutOfLineCode;
|
||||
friend class CodeGeneratorTester;
|
||||
|
||||
Zone* zone_;
|
||||
FrameAccessState* frame_access_state_;
|
||||
|
@ -27,6 +27,7 @@ v8_executable("cctest") {
|
||||
"compiler/test-basic-block-profiler.cc",
|
||||
"compiler/test-branch-combine.cc",
|
||||
"compiler/test-code-assembler.cc",
|
||||
"compiler/test-code-generator.cc",
|
||||
"compiler/test-gap-resolver.cc",
|
||||
"compiler/test-graph-visualizer.cc",
|
||||
"compiler/test-instruction.cc",
|
||||
|
@ -45,6 +45,7 @@
|
||||
'compiler/test-run-unwinding-info.cc',
|
||||
'compiler/test-gap-resolver.cc',
|
||||
'compiler/test-graph-visualizer.cc',
|
||||
'compiler/test-code-generator.cc',
|
||||
'compiler/test-code-assembler.cc',
|
||||
'compiler/test-instruction.cc',
|
||||
'compiler/test-js-context-specialization.cc',
|
||||
|
199
test/cctest/compiler/test-code-generator.cc
Normal file
199
test/cctest/compiler/test-code-generator.cc
Normal file
@ -0,0 +1,199 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/codegen.h"
|
||||
#include "src/compilation-info.h"
|
||||
#include "src/compiler/code-generator.h"
|
||||
#include "src/compiler/instruction.h"
|
||||
#include "src/compiler/linkage.h"
|
||||
#include "src/isolate.h"
|
||||
|
||||
#include "test/cctest/cctest.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
class CodeGeneratorTester : public InitializedHandleScope {
|
||||
public:
|
||||
CodeGeneratorTester()
|
||||
: zone_(main_isolate()->allocator(), ZONE_NAME),
|
||||
info_(ArrayVector("test"), main_isolate(), &zone_,
|
||||
Code::ComputeFlags(Code::STUB)),
|
||||
descriptor_(Linkage::GetJSCallDescriptor(&zone_, false, 0,
|
||||
CallDescriptor::kNoFlags)),
|
||||
linkage_(descriptor_),
|
||||
blocks_(&zone_),
|
||||
sequence_(main_isolate(), &zone_, &blocks_),
|
||||
frame_(descriptor_->CalculateFixedFrameSize()),
|
||||
generator_(&zone_, &frame_, &linkage_, &sequence_, &info_,
|
||||
base::Optional<OsrHelper>(), kNoSourcePosition) {
|
||||
info_.set_prologue_offset(generator_.tasm()->pc_offset());
|
||||
}
|
||||
|
||||
enum PushTypeFlag {
|
||||
kRegisterPush = CodeGenerator::kRegisterPush,
|
||||
kStackSlotPush = CodeGenerator::kStackSlotPush,
|
||||
kScalarPush = CodeGenerator::kScalarPush
|
||||
};
|
||||
|
||||
void CheckAssembleTailCallGaps(Instruction* instr,
|
||||
int first_unused_stack_slot,
|
||||
CodeGeneratorTester::PushTypeFlag push_type) {
|
||||
generator_.AssembleTailCallBeforeGap(instr, first_unused_stack_slot);
|
||||
#if defined(V8_TARGET_ARCH_ARM)
|
||||
// Only folding register pushes is supported on ARM.
|
||||
bool supported = ((push_type & CodeGenerator::kRegisterPush) == push_type);
|
||||
#elif defined(V8_TARGET_ARCH_X64) || defined(V8_TARGET_ARCH_IA32) || \
|
||||
defined(V8_TARGET_ARCH_X87) || defined(V8_TARGET_ARCH_S390X) || \
|
||||
defined(V8_TARGET_ARCH_PPC64)
|
||||
bool supported = ((push_type & CodeGenerator::kScalarPush) == push_type);
|
||||
#else
|
||||
bool supported = false;
|
||||
#endif
|
||||
if (supported) {
|
||||
// Architectures supporting folding adjacent pushes should now have
|
||||
// resolved all moves.
|
||||
for (const auto& move :
|
||||
*instr->parallel_moves()[Instruction::FIRST_GAP_POSITION]) {
|
||||
CHECK(move->IsEliminated());
|
||||
}
|
||||
}
|
||||
generator_.AssembleGaps(instr);
|
||||
generator_.AssembleTailCallAfterGap(instr, first_unused_stack_slot);
|
||||
}
|
||||
|
||||
Handle<Code> Finalize() {
|
||||
generator_.FinishCode();
|
||||
generator_.safepoints()->Emit(generator_.tasm(),
|
||||
frame_.GetTotalFrameSlotCount());
|
||||
return generator_.FinalizeCode();
|
||||
}
|
||||
|
||||
void Disassemble() {
|
||||
HandleScope scope(main_isolate());
|
||||
Handle<Code> code = Finalize();
|
||||
if (FLAG_print_code) {
|
||||
code->Print();
|
||||
}
|
||||
}
|
||||
|
||||
Zone* zone() { return &zone_; }
|
||||
|
||||
private:
|
||||
Zone zone_;
|
||||
CompilationInfo info_;
|
||||
CallDescriptor* descriptor_;
|
||||
Linkage linkage_;
|
||||
ZoneVector<InstructionBlock*> blocks_;
|
||||
InstructionSequence sequence_;
|
||||
Frame frame_;
|
||||
CodeGenerator generator_;
|
||||
};
|
||||
|
||||
TEST(AssembleTailCallGap) {
|
||||
const RegisterConfiguration* conf = RegisterConfiguration::Turbofan();
|
||||
|
||||
// This test assumes at least 4 registers are allocatable.
|
||||
CHECK(conf->num_allocatable_general_registers() >= 4);
|
||||
|
||||
auto r0 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(0));
|
||||
auto r1 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(1));
|
||||
auto r2 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(2));
|
||||
auto r3 = AllocatedOperand(LocationOperand::REGISTER,
|
||||
MachineRepresentation::kTagged,
|
||||
conf->GetAllocatableGeneralCode(3));
|
||||
|
||||
auto slot_minus_4 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -4);
|
||||
auto slot_minus_3 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -3);
|
||||
auto slot_minus_2 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -2);
|
||||
auto slot_minus_1 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, -1);
|
||||
|
||||
// Avoid slot 0 for architectures which use it store the return address.
|
||||
int first_slot = V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK ? 1 : 0;
|
||||
auto slot_0 = AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot);
|
||||
auto slot_1 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 1);
|
||||
auto slot_2 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 2);
|
||||
auto slot_3 =
|
||||
AllocatedOperand(LocationOperand::STACK_SLOT,
|
||||
MachineRepresentation::kTagged, first_slot + 3);
|
||||
|
||||
// These tests all generate series of moves that the code generator should
|
||||
// detect as adjacent pushes. Depending on the architecture, we make sure
|
||||
// these moves get eliminated.
|
||||
// Also, disassembling with `--print-code` is useful when debugging.
|
||||
|
||||
{
|
||||
// Generate a series of register pushes only.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r3, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r2, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r1, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r0, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kRegisterPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
|
||||
{
|
||||
// Generate a series of stack pushes only.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_4, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_3, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_2, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_1, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kStackSlotPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
|
||||
{
|
||||
// Generate a mix of stack and register pushes.
|
||||
CodeGeneratorTester c;
|
||||
Instruction* instr = Instruction::New(c.zone(), kArchNop);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_2, slot_0);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r1, slot_1);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(slot_minus_1, slot_2);
|
||||
instr->GetOrCreateParallelMove(Instruction::FIRST_GAP_POSITION, c.zone())
|
||||
->AddMove(r0, slot_3);
|
||||
|
||||
c.CheckAssembleTailCallGaps(instr, first_slot + 4,
|
||||
CodeGeneratorTester::kScalarPush);
|
||||
c.Disassemble();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
Loading…
Reference in New Issue
Block a user