2014-11-27 09:19:31 +00:00
|
|
|
// Copyright 2014 the V8 project authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "src/compiler/move-optimizer.h"
|
[turbofan] fine grained in-block move optimization
So far, we've been moving down gaps wholesale. This change moves
individual move operations instead. This improves some benchmarks,
and should overall reduce code size, because it improves the chance of
reducing the number of moves.
For example, there are improvements on x64 in Emscripten (Bullet, in
particular) , JetStream geomean, Embenchen (zlib).
In the process of making this change, I noticed we can separate the
tasks performed by the move optimizer, as follows:
- group gaps into 1
- push gaps down, jumping instructions (these 2 were together before)
- merge blocks (and then push gaps down)
- finalize
We can do without a finalization list. This avoids duplicating storage -
we already have the list of instructions; it also simplifies the logic, since,
with this change, we may process an instruction's gap twice.
Compile time doesn't regress much (see pathological cases), but we
may want to avoid the allocations of the few sets used in the new code.
I'll do that in a subsequent change.
BUG=
Review URL: https://codereview.chromium.org/1634093002
Cr-Commit-Position: refs/heads/master@{#33715}
2016-02-04 06:29:54 +00:00
|
|
|
#include "src/compiler/pipeline.h"
|
2017-02-23 11:46:29 +00:00
|
|
|
#include "src/ostreams.h"
|
2014-11-27 09:19:31 +00:00
|
|
|
#include "test/unittests/compiler/instruction-sequence-unittest.h"
|
|
|
|
|
|
|
|
namespace v8 {
|
|
|
|
namespace internal {
|
|
|
|
namespace compiler {
|
|
|
|
|
|
|
|
class MoveOptimizerTest : public InstructionSequenceTest {
|
|
|
|
public:
|
2016-10-26 16:04:11 +00:00
|
|
|
// FP register indices which don't interfere under simple or complex aliasing.
|
|
|
|
static const int kF64_1 = 0;
|
|
|
|
static const int kF64_2 = 1;
|
|
|
|
static const int kF32_1 = 4;
|
|
|
|
static const int kF32_2 = 5;
|
|
|
|
static const int kS128_1 = 2;
|
|
|
|
static const int kS128_2 = 3;
|
|
|
|
|
2015-03-31 13:06:37 +00:00
|
|
|
Instruction* LastInstruction() { return sequence()->instructions().back(); }
|
2014-11-27 09:19:31 +00:00
|
|
|
|
2015-03-31 13:06:37 +00:00
|
|
|
void AddMove(Instruction* instr, TestOperand from, TestOperand to,
|
|
|
|
Instruction::GapPosition pos = Instruction::START) {
|
|
|
|
auto parallel_move = instr->GetOrCreateParallelMove(pos, zone());
|
2015-04-15 12:36:36 +00:00
|
|
|
parallel_move->AddMove(ConvertMoveArg(from), ConvertMoveArg(to));
|
2014-11-27 09:19:31 +00:00
|
|
|
}
|
|
|
|
|
2015-04-15 12:36:36 +00:00
|
|
|
int NonRedundantSize(ParallelMove* moves) {
|
2014-11-27 09:19:31 +00:00
|
|
|
int i = 0;
|
2015-04-15 12:36:36 +00:00
|
|
|
for (auto move : *moves) {
|
|
|
|
if (move->IsRedundant()) continue;
|
2014-11-27 09:19:31 +00:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2015-04-15 12:36:36 +00:00
|
|
|
bool Contains(ParallelMove* moves, TestOperand from_op, TestOperand to_op) {
|
2014-11-27 09:19:31 +00:00
|
|
|
auto from = ConvertMoveArg(from_op);
|
|
|
|
auto to = ConvertMoveArg(to_op);
|
2015-04-15 12:36:36 +00:00
|
|
|
for (auto move : *moves) {
|
|
|
|
if (move->IsRedundant()) continue;
|
2015-04-29 19:36:16 +00:00
|
|
|
if (move->source().Equals(from) && move->destination().Equals(to)) {
|
2014-11-27 09:19:31 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(dcarney): add a verifier.
|
|
|
|
void Optimize() {
|
|
|
|
WireBlocks();
|
|
|
|
if (FLAG_trace_turbo) {
|
|
|
|
OFStream os(stdout);
|
|
|
|
PrintableInstructionSequence printable = {config(), sequence()};
|
|
|
|
os << "----- Instruction sequence before move optimization -----\n"
|
|
|
|
<< printable;
|
|
|
|
}
|
|
|
|
MoveOptimizer move_optimizer(zone(), sequence());
|
|
|
|
move_optimizer.Run();
|
|
|
|
if (FLAG_trace_turbo) {
|
|
|
|
OFStream os(stdout);
|
|
|
|
PrintableInstructionSequence printable = {config(), sequence()};
|
|
|
|
os << "----- Instruction sequence after move optimization -----\n"
|
|
|
|
<< printable;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2016-10-10 11:06:42 +00:00
|
|
|
bool DoesRegisterAllocation() const override { return false; }
|
|
|
|
|
2015-04-15 12:36:36 +00:00
|
|
|
InstructionOperand ConvertMoveArg(TestOperand op) {
|
2014-11-27 09:19:31 +00:00
|
|
|
CHECK_EQ(kNoValue, op.vreg_.value_);
|
|
|
|
CHECK_NE(kNoValue, op.value_);
|
|
|
|
switch (op.type_) {
|
|
|
|
case kConstant:
|
2015-04-15 12:36:36 +00:00
|
|
|
return ConstantOperand(op.value_);
|
2014-11-27 09:19:31 +00:00
|
|
|
case kFixedSlot:
|
2015-12-10 09:03:30 +00:00
|
|
|
return AllocatedOperand(LocationOperand::STACK_SLOT,
|
|
|
|
MachineRepresentation::kWord32, op.value_);
|
2016-10-10 11:06:42 +00:00
|
|
|
case kFixedRegister: {
|
|
|
|
MachineRepresentation rep = GetCanonicalRep(op);
|
|
|
|
CHECK(0 <= op.value_ && op.value_ < GetNumRegs(rep));
|
|
|
|
return AllocatedOperand(LocationOperand::REGISTER, rep, op.value_);
|
|
|
|
}
|
|
|
|
case kExplicit: {
|
|
|
|
MachineRepresentation rep = GetCanonicalRep(op);
|
|
|
|
CHECK(0 <= op.value_ && op.value_ < GetNumRegs(rep));
|
|
|
|
return ExplicitOperand(LocationOperand::REGISTER, rep, op.value_);
|
|
|
|
}
|
2014-11-27 09:19:31 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2017-12-19 11:39:50 +00:00
|
|
|
UNREACHABLE();
|
2014-11-27 09:19:31 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(MoveOptimizerTest, RemovesRedundant) {
|
|
|
|
StartBlock();
|
2015-03-31 13:06:37 +00:00
|
|
|
auto first_instr = EmitNop();
|
|
|
|
auto last_instr = EmitNop();
|
2016-10-10 11:06:42 +00:00
|
|
|
|
|
|
|
AddMove(first_instr, Reg(0), Reg(1));
|
2015-03-31 13:06:37 +00:00
|
|
|
AddMove(last_instr, Reg(1), Reg(0));
|
2016-10-10 11:06:42 +00:00
|
|
|
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(first_instr, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
|
|
|
|
AddMove(last_instr, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
|
|
|
|
AddMove(first_instr, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
|
|
|
|
AddMove(last_instr, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
|
|
|
|
AddMove(first_instr, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
|
|
|
|
AddMove(last_instr, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
|
2016-10-10 11:06:42 +00:00
|
|
|
|
2014-11-27 09:19:31 +00:00
|
|
|
EndBlock(Last());
|
|
|
|
|
|
|
|
Optimize();
|
|
|
|
|
2015-03-31 13:06:37 +00:00
|
|
|
CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
|
|
|
|
auto move = last_instr->parallel_moves()[0];
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK_EQ(4, NonRedundantSize(move));
|
2014-11-27 09:19:31 +00:00
|
|
|
CHECK(Contains(move, Reg(0), Reg(1)));
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
|
|
|
|
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
|
|
|
|
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
|
2014-11-27 09:19:31 +00:00
|
|
|
}
|
|
|
|
|
[turbofan] Create ExplicitOperands to specify operands without virtual registers
Up until now, if one wanted to specify an explicit stack location or register as an operand for an instruction, it had to also be
explicitly associated with a virtual register as a so-called
FixedRegister or FixedStackSlot.
For the implementation of tail calls, the plan is to use the gap
resolver needs to shuffle stack locations from the caller to the
tail-called callee. In order to do this, it must be possible to
explicitly address operand locations on the stack that are not
associated with virtual registers.
This CL introduces ExplictOperands, which can specify a specific
register or stack location that is not associated with virtual
register. This will allow tail calls to specify the target
locations for the necessary stack moves in the gap for the tail
call without the core register allocation having to know about
the target of the stack moves at all.
In the process this CL:
* creates a new Operand kind, ExplicitOperand, with which
instructions can specify register and stack slots without an
associated virtual register.
* creates a LocationOperand class from which AllocatedOperand and
ExplicitOperand are derived and provides a common interface to
get Register, DoubleRegister and spill slot information.
* removes RegisterOperand, DoubleRegisterOperand,
StackSlotOperand and DoubleStackSlotOperand, they are subsumed
by LocationOperand.
* addresses a cleanup TODO in AllocatedOperand to reduce the
redundancy of AllocatedOperand::Kind by using machine_type() to
determine if an operand corresponds to a general purpose or
double register.
BUG=v8:4076
LOG=n
Review URL: https://codereview.chromium.org/1389373002
Cr-Commit-Position: refs/heads/master@{#31603}
2015-10-27 13:26:35 +00:00
|
|
|
TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {
|
2016-10-26 16:04:11 +00:00
|
|
|
int index1 = GetAllocatableCode(0);
|
|
|
|
int index2 = GetAllocatableCode(1);
|
|
|
|
int s128_1 = GetAllocatableCode(kS128_1, kSimd128);
|
|
|
|
int s128_2 = GetAllocatableCode(kS128_2, kSimd128);
|
|
|
|
int f64_1 = GetAllocatableCode(kF64_1, kFloat64);
|
|
|
|
int f64_2 = GetAllocatableCode(kF64_2, kFloat64);
|
|
|
|
int f32_1 = GetAllocatableCode(kF32_1, kFloat32);
|
|
|
|
int f32_2 = GetAllocatableCode(kF32_2, kFloat32);
|
2015-10-28 13:04:14 +00:00
|
|
|
|
[turbofan] Create ExplicitOperands to specify operands without virtual registers
Up until now, if one wanted to specify an explicit stack location or register as an operand for an instruction, it had to also be
explicitly associated with a virtual register as a so-called
FixedRegister or FixedStackSlot.
For the implementation of tail calls, the plan is to use the gap
resolver needs to shuffle stack locations from the caller to the
tail-called callee. In order to do this, it must be possible to
explicitly address operand locations on the stack that are not
associated with virtual registers.
This CL introduces ExplictOperands, which can specify a specific
register or stack location that is not associated with virtual
register. This will allow tail calls to specify the target
locations for the necessary stack moves in the gap for the tail
call without the core register allocation having to know about
the target of the stack moves at all.
In the process this CL:
* creates a new Operand kind, ExplicitOperand, with which
instructions can specify register and stack slots without an
associated virtual register.
* creates a LocationOperand class from which AllocatedOperand and
ExplicitOperand are derived and provides a common interface to
get Register, DoubleRegister and spill slot information.
* removes RegisterOperand, DoubleRegisterOperand,
StackSlotOperand and DoubleStackSlotOperand, they are subsumed
by LocationOperand.
* addresses a cleanup TODO in AllocatedOperand to reduce the
redundancy of AllocatedOperand::Kind by using machine_type() to
determine if an operand corresponds to a general purpose or
double register.
BUG=v8:4076
LOG=n
Review URL: https://codereview.chromium.org/1389373002
Cr-Commit-Position: refs/heads/master@{#31603}
2015-10-27 13:26:35 +00:00
|
|
|
StartBlock();
|
|
|
|
auto first_instr = EmitNop();
|
|
|
|
auto last_instr = EmitNop();
|
2016-10-10 11:06:42 +00:00
|
|
|
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(first_instr, Reg(index1), ExplicitReg(index2));
|
|
|
|
AddMove(last_instr, Reg(index2), Reg(index1));
|
|
|
|
|
|
|
|
AddMove(first_instr, FPReg(s128_1, kSimd128),
|
|
|
|
ExplicitFPReg(s128_2, kSimd128));
|
|
|
|
AddMove(last_instr, FPReg(s128_2, kSimd128), FPReg(s128_1, kSimd128));
|
|
|
|
AddMove(first_instr, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64));
|
|
|
|
AddMove(last_instr, FPReg(f64_2, kFloat64), FPReg(f64_1, kFloat64));
|
|
|
|
AddMove(first_instr, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32));
|
|
|
|
AddMove(last_instr, FPReg(f32_2, kFloat32), FPReg(f32_1, kFloat32));
|
2016-10-10 11:06:42 +00:00
|
|
|
|
[turbofan] Create ExplicitOperands to specify operands without virtual registers
Up until now, if one wanted to specify an explicit stack location or register as an operand for an instruction, it had to also be
explicitly associated with a virtual register as a so-called
FixedRegister or FixedStackSlot.
For the implementation of tail calls, the plan is to use the gap
resolver needs to shuffle stack locations from the caller to the
tail-called callee. In order to do this, it must be possible to
explicitly address operand locations on the stack that are not
associated with virtual registers.
This CL introduces ExplictOperands, which can specify a specific
register or stack location that is not associated with virtual
register. This will allow tail calls to specify the target
locations for the necessary stack moves in the gap for the tail
call without the core register allocation having to know about
the target of the stack moves at all.
In the process this CL:
* creates a new Operand kind, ExplicitOperand, with which
instructions can specify register and stack slots without an
associated virtual register.
* creates a LocationOperand class from which AllocatedOperand and
ExplicitOperand are derived and provides a common interface to
get Register, DoubleRegister and spill slot information.
* removes RegisterOperand, DoubleRegisterOperand,
StackSlotOperand and DoubleStackSlotOperand, they are subsumed
by LocationOperand.
* addresses a cleanup TODO in AllocatedOperand to reduce the
redundancy of AllocatedOperand::Kind by using machine_type() to
determine if an operand corresponds to a general purpose or
double register.
BUG=v8:4076
LOG=n
Review URL: https://codereview.chromium.org/1389373002
Cr-Commit-Position: refs/heads/master@{#31603}
2015-10-27 13:26:35 +00:00
|
|
|
EndBlock(Last());
|
|
|
|
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
|
|
|
|
auto move = last_instr->parallel_moves()[0];
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK_EQ(4, NonRedundantSize(move));
|
|
|
|
CHECK(Contains(move, Reg(index1), ExplicitReg(index2)));
|
|
|
|
CHECK(
|
|
|
|
Contains(move, FPReg(s128_1, kSimd128), ExplicitFPReg(s128_2, kSimd128)));
|
|
|
|
CHECK(Contains(move, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64)));
|
|
|
|
CHECK(Contains(move, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32)));
|
[turbofan] Create ExplicitOperands to specify operands without virtual registers
Up until now, if one wanted to specify an explicit stack location or register as an operand for an instruction, it had to also be
explicitly associated with a virtual register as a so-called
FixedRegister or FixedStackSlot.
For the implementation of tail calls, the plan is to use the gap
resolver needs to shuffle stack locations from the caller to the
tail-called callee. In order to do this, it must be possible to
explicitly address operand locations on the stack that are not
associated with virtual registers.
This CL introduces ExplictOperands, which can specify a specific
register or stack location that is not associated with virtual
register. This will allow tail calls to specify the target
locations for the necessary stack moves in the gap for the tail
call without the core register allocation having to know about
the target of the stack moves at all.
In the process this CL:
* creates a new Operand kind, ExplicitOperand, with which
instructions can specify register and stack slots without an
associated virtual register.
* creates a LocationOperand class from which AllocatedOperand and
ExplicitOperand are derived and provides a common interface to
get Register, DoubleRegister and spill slot information.
* removes RegisterOperand, DoubleRegisterOperand,
StackSlotOperand and DoubleStackSlotOperand, they are subsumed
by LocationOperand.
* addresses a cleanup TODO in AllocatedOperand to reduce the
redundancy of AllocatedOperand::Kind by using machine_type() to
determine if an operand corresponds to a general purpose or
double register.
BUG=v8:4076
LOG=n
Review URL: https://codereview.chromium.org/1389373002
Cr-Commit-Position: refs/heads/master@{#31603}
2015-10-27 13:26:35 +00:00
|
|
|
}
|
|
|
|
|
2014-11-27 09:19:31 +00:00
|
|
|
TEST_F(MoveOptimizerTest, SplitsConstants) {
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Last());
|
|
|
|
|
2015-03-31 13:06:37 +00:00
|
|
|
auto gap = LastInstruction();
|
2014-11-27 09:19:31 +00:00
|
|
|
AddMove(gap, Const(1), Slot(0));
|
|
|
|
AddMove(gap, Const(1), Slot(1));
|
|
|
|
AddMove(gap, Const(1), Reg(0));
|
|
|
|
AddMove(gap, Const(1), Slot(2));
|
|
|
|
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
auto move = gap->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(move));
|
|
|
|
CHECK(Contains(move, Const(1), Reg(0)));
|
|
|
|
|
|
|
|
move = gap->parallel_moves()[1];
|
|
|
|
CHECK_EQ(3, NonRedundantSize(move));
|
|
|
|
CHECK(Contains(move, Reg(0), Slot(0)));
|
|
|
|
CHECK(Contains(move, Reg(0), Slot(1)));
|
|
|
|
CHECK(Contains(move, Reg(0), Slot(2)));
|
|
|
|
}
|
|
|
|
|
2015-02-24 12:49:28 +00:00
|
|
|
TEST_F(MoveOptimizerTest, SimpleMerge) {
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Branch(Imm(), 1, 2));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(2));
|
2015-03-31 13:06:37 +00:00
|
|
|
AddMove(LastInstruction(), Reg(0), Reg(1));
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
|
|
|
|
FPReg(kS128_2, kSimd128));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
|
2015-02-24 12:49:28 +00:00
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(1));
|
2015-03-31 13:06:37 +00:00
|
|
|
AddMove(LastInstruction(), Reg(0), Reg(1));
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
|
|
|
|
FPReg(kS128_2, kSimd128));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
|
2015-02-24 12:49:28 +00:00
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Last());
|
|
|
|
|
2015-04-30 13:39:11 +00:00
|
|
|
auto last = LastInstruction();
|
|
|
|
|
2015-02-24 12:49:28 +00:00
|
|
|
Optimize();
|
|
|
|
|
2015-04-30 13:39:11 +00:00
|
|
|
auto move = last->parallel_moves()[0];
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK_EQ(4, NonRedundantSize(move));
|
2015-02-24 12:49:28 +00:00
|
|
|
CHECK(Contains(move, Reg(0), Reg(1)));
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
|
|
|
|
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
|
|
|
|
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
|
2015-02-24 12:49:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Branch(Imm(), 1, 2));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(2));
|
2015-03-31 13:06:37 +00:00
|
|
|
auto gap_0 = LastInstruction();
|
2015-02-24 12:49:28 +00:00
|
|
|
AddMove(gap_0, Reg(0), Reg(1));
|
2015-03-31 13:06:37 +00:00
|
|
|
AddMove(LastInstruction(), Reg(1), Reg(0));
|
2015-02-24 12:49:28 +00:00
|
|
|
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(gap_0, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
|
|
|
|
AddMove(LastInstruction(), FPReg(kS128_2, kSimd128),
|
|
|
|
FPReg(kS128_1, kSimd128));
|
|
|
|
AddMove(gap_0, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
|
|
|
|
AddMove(gap_0, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
|
|
|
|
AddMove(LastInstruction(), FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
|
2016-10-10 11:06:42 +00:00
|
|
|
|
2015-02-24 12:49:28 +00:00
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(1));
|
2015-03-31 13:06:37 +00:00
|
|
|
auto gap_1 = LastInstruction();
|
2015-02-24 12:49:28 +00:00
|
|
|
AddMove(gap_1, Reg(0), Reg(1));
|
|
|
|
AddMove(gap_1, Reg(1), Reg(0));
|
2016-10-26 16:04:11 +00:00
|
|
|
AddMove(gap_1, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
|
|
|
|
AddMove(gap_1, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
|
|
|
|
AddMove(gap_1, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
|
|
|
|
AddMove(gap_1, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
|
|
|
|
AddMove(gap_1, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
|
|
|
|
AddMove(gap_1, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
|
2015-02-24 12:49:28 +00:00
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Last());
|
|
|
|
|
2015-04-30 13:39:11 +00:00
|
|
|
auto last = LastInstruction();
|
|
|
|
|
2015-02-24 12:49:28 +00:00
|
|
|
Optimize();
|
|
|
|
|
2015-03-31 13:06:37 +00:00
|
|
|
CHECK(gap_0->AreMovesRedundant());
|
|
|
|
CHECK(gap_1->AreMovesRedundant());
|
2015-04-30 13:39:11 +00:00
|
|
|
auto move = last->parallel_moves()[0];
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK_EQ(8, NonRedundantSize(move));
|
2015-02-24 12:49:28 +00:00
|
|
|
CHECK(Contains(move, Reg(0), Reg(1)));
|
|
|
|
CHECK(Contains(move, Reg(1), Reg(0)));
|
2016-10-26 16:04:11 +00:00
|
|
|
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
|
|
|
|
CHECK(Contains(move, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)));
|
|
|
|
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
|
|
|
|
CHECK(Contains(move, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)));
|
|
|
|
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
|
|
|
|
CHECK(Contains(move, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)));
|
2015-02-24 12:49:28 +00:00
|
|
|
}
|
|
|
|
|
2015-10-29 16:12:15 +00:00
|
|
|
TEST_F(MoveOptimizerTest, GapsCanMoveOverInstruction) {
|
|
|
|
StartBlock();
|
|
|
|
int const_index = 1;
|
|
|
|
DefineConstant(const_index);
|
|
|
|
Instruction* ctant_def = LastInstruction();
|
|
|
|
AddMove(ctant_def, Reg(1), Reg(0));
|
|
|
|
|
|
|
|
Instruction* last = EmitNop();
|
|
|
|
AddMove(last, Const(const_index), Reg(0));
|
|
|
|
AddMove(last, Reg(0), Reg(1));
|
|
|
|
EndBlock(Last());
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
ParallelMove* inst1_start =
|
|
|
|
ctant_def->GetParallelMove(Instruction::GapPosition::START);
|
|
|
|
ParallelMove* inst1_end =
|
|
|
|
ctant_def->GetParallelMove(Instruction::GapPosition::END);
|
|
|
|
ParallelMove* last_start =
|
|
|
|
last->GetParallelMove(Instruction::GapPosition::START);
|
[turbofan] fine grained in-block move optimization
So far, we've been moving down gaps wholesale. This change moves
individual move operations instead. This improves some benchmarks,
and should overall reduce code size, because it improves the chance of
reducing the number of moves.
For example, there are improvements on x64 in Emscripten (Bullet, in
particular) , JetStream geomean, Embenchen (zlib).
In the process of making this change, I noticed we can separate the
tasks performed by the move optimizer, as follows:
- group gaps into 1
- push gaps down, jumping instructions (these 2 were together before)
- merge blocks (and then push gaps down)
- finalize
We can do without a finalization list. This avoids duplicating storage -
we already have the list of instructions; it also simplifies the logic, since,
with this change, we may process an instruction's gap twice.
Compile time doesn't regress much (see pathological cases), but we
may want to avoid the allocations of the few sets used in the new code.
I'll do that in a subsequent change.
BUG=
Review URL: https://codereview.chromium.org/1634093002
Cr-Commit-Position: refs/heads/master@{#33715}
2016-02-04 06:29:54 +00:00
|
|
|
CHECK(inst1_start == nullptr || NonRedundantSize(inst1_start) == 0);
|
|
|
|
CHECK(inst1_end == nullptr || NonRedundantSize(inst1_end) == 0);
|
2017-09-25 09:45:55 +00:00
|
|
|
CHECK_EQ(2, last_start->size());
|
2015-10-29 16:12:15 +00:00
|
|
|
int redundants = 0;
|
|
|
|
int assignment = 0;
|
|
|
|
for (MoveOperands* move : *last_start) {
|
|
|
|
if (move->IsRedundant()) {
|
|
|
|
++redundants;
|
|
|
|
} else {
|
|
|
|
++assignment;
|
|
|
|
CHECK(move->destination().IsRegister());
|
|
|
|
CHECK(move->source().IsConstant());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK_EQ(1, redundants);
|
|
|
|
CHECK_EQ(1, assignment);
|
|
|
|
}
|
|
|
|
|
2016-01-25 06:32:19 +00:00
|
|
|
TEST_F(MoveOptimizerTest, SubsetMovesMerge) {
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Branch(Imm(), 1, 2));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(2));
|
|
|
|
Instruction* last_move_b1 = LastInstruction();
|
|
|
|
AddMove(last_move_b1, Reg(0), Reg(1));
|
|
|
|
AddMove(last_move_b1, Reg(2), Reg(3));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(1));
|
|
|
|
Instruction* last_move_b2 = LastInstruction();
|
|
|
|
AddMove(last_move_b2, Reg(0), Reg(1));
|
|
|
|
AddMove(last_move_b2, Reg(4), Reg(5));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Last());
|
|
|
|
|
|
|
|
Instruction* last = LastInstruction();
|
|
|
|
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
ParallelMove* last_move = last->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(last_move));
|
|
|
|
CHECK(Contains(last_move, Reg(0), Reg(1)));
|
|
|
|
|
|
|
|
ParallelMove* b1_move = last_move_b1->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(b1_move));
|
|
|
|
CHECK(Contains(b1_move, Reg(2), Reg(3)));
|
|
|
|
|
|
|
|
ParallelMove* b2_move = last_move_b2->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(b2_move));
|
|
|
|
CHECK(Contains(b2_move, Reg(4), Reg(5)));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(MoveOptimizerTest, GapConflictSubsetMovesDoNotMerge) {
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Branch(Imm(), 1, 2));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(2));
|
|
|
|
Instruction* last_move_b1 = LastInstruction();
|
|
|
|
AddMove(last_move_b1, Reg(0), Reg(1));
|
|
|
|
AddMove(last_move_b1, Reg(2), Reg(0));
|
|
|
|
AddMove(last_move_b1, Reg(4), Reg(5));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Jump(1));
|
|
|
|
Instruction* last_move_b2 = LastInstruction();
|
|
|
|
AddMove(last_move_b2, Reg(0), Reg(1));
|
|
|
|
AddMove(last_move_b2, Reg(4), Reg(5));
|
|
|
|
|
|
|
|
StartBlock();
|
|
|
|
EndBlock(Last());
|
|
|
|
|
|
|
|
Instruction* last = LastInstruction();
|
|
|
|
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
ParallelMove* last_move = last->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(last_move));
|
|
|
|
CHECK(Contains(last_move, Reg(4), Reg(5)));
|
|
|
|
|
|
|
|
ParallelMove* b1_move = last_move_b1->parallel_moves()[0];
|
|
|
|
CHECK_EQ(2, NonRedundantSize(b1_move));
|
|
|
|
CHECK(Contains(b1_move, Reg(0), Reg(1)));
|
|
|
|
CHECK(Contains(b1_move, Reg(2), Reg(0)));
|
|
|
|
|
|
|
|
ParallelMove* b2_move = last_move_b2->parallel_moves()[0];
|
|
|
|
CHECK_EQ(1, NonRedundantSize(b2_move));
|
|
|
|
CHECK(Contains(b1_move, Reg(0), Reg(1)));
|
|
|
|
}
|
|
|
|
|
[turbofan] fine grained in-block move optimization
So far, we've been moving down gaps wholesale. This change moves
individual move operations instead. This improves some benchmarks,
and should overall reduce code size, because it improves the chance of
reducing the number of moves.
For example, there are improvements on x64 in Emscripten (Bullet, in
particular) , JetStream geomean, Embenchen (zlib).
In the process of making this change, I noticed we can separate the
tasks performed by the move optimizer, as follows:
- group gaps into 1
- push gaps down, jumping instructions (these 2 were together before)
- merge blocks (and then push gaps down)
- finalize
We can do without a finalization list. This avoids duplicating storage -
we already have the list of instructions; it also simplifies the logic, since,
with this change, we may process an instruction's gap twice.
Compile time doesn't regress much (see pathological cases), but we
may want to avoid the allocations of the few sets used in the new code.
I'll do that in a subsequent change.
BUG=
Review URL: https://codereview.chromium.org/1634093002
Cr-Commit-Position: refs/heads/master@{#33715}
2016-02-04 06:29:54 +00:00
|
|
|
TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) {
|
|
|
|
StartBlock();
|
|
|
|
EmitNop();
|
|
|
|
Instruction* first_instr = LastInstruction();
|
|
|
|
AddMove(first_instr, Reg(0), Reg(1));
|
2016-10-26 16:04:11 +00:00
|
|
|
EmitOI(Reg(1), 0, nullptr);
|
|
|
|
Instruction* last_instr = LastInstruction();
|
|
|
|
EndBlock();
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
ParallelMove* first_move = first_instr->parallel_moves()[0];
|
|
|
|
CHECK_EQ(0, NonRedundantSize(first_move));
|
|
|
|
|
|
|
|
ParallelMove* last_move = last_instr->parallel_moves()[0];
|
|
|
|
CHECK_EQ(0, NonRedundantSize(last_move));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(MoveOptimizerTest, ClobberedFPDestinationsAreEliminated) {
|
|
|
|
StartBlock();
|
|
|
|
EmitNop();
|
|
|
|
Instruction* first_instr = LastInstruction();
|
|
|
|
AddMove(first_instr, FPReg(4, kFloat64), FPReg(1, kFloat64));
|
|
|
|
if (!kSimpleFPAliasing) {
|
|
|
|
// We clobber q0 below. This is aliased by d0, d1, s0, s1, s2, and s3.
|
|
|
|
// Add moves to registers s2 and s3.
|
|
|
|
AddMove(first_instr, FPReg(10, kFloat32), FPReg(0, kFloat32));
|
|
|
|
AddMove(first_instr, FPReg(11, kFloat32), FPReg(1, kFloat32));
|
|
|
|
}
|
|
|
|
// Clobbers output register 0.
|
|
|
|
EmitOI(FPReg(0, kSimd128), 0, nullptr);
|
[turbofan] fine grained in-block move optimization
So far, we've been moving down gaps wholesale. This change moves
individual move operations instead. This improves some benchmarks,
and should overall reduce code size, because it improves the chance of
reducing the number of moves.
For example, there are improvements on x64 in Emscripten (Bullet, in
particular) , JetStream geomean, Embenchen (zlib).
In the process of making this change, I noticed we can separate the
tasks performed by the move optimizer, as follows:
- group gaps into 1
- push gaps down, jumping instructions (these 2 were together before)
- merge blocks (and then push gaps down)
- finalize
We can do without a finalization list. This avoids duplicating storage -
we already have the list of instructions; it also simplifies the logic, since,
with this change, we may process an instruction's gap twice.
Compile time doesn't regress much (see pathological cases), but we
may want to avoid the allocations of the few sets used in the new code.
I'll do that in a subsequent change.
BUG=
Review URL: https://codereview.chromium.org/1634093002
Cr-Commit-Position: refs/heads/master@{#33715}
2016-02-04 06:29:54 +00:00
|
|
|
Instruction* last_instr = LastInstruction();
|
|
|
|
EndBlock();
|
|
|
|
Optimize();
|
|
|
|
|
|
|
|
ParallelMove* first_move = first_instr->parallel_moves()[0];
|
|
|
|
CHECK_EQ(0, NonRedundantSize(first_move));
|
|
|
|
|
|
|
|
ParallelMove* last_move = last_instr->parallel_moves()[0];
|
|
|
|
CHECK_EQ(0, NonRedundantSize(last_move));
|
|
|
|
}
|
2016-01-25 06:32:19 +00:00
|
|
|
|
2014-11-27 09:19:31 +00:00
|
|
|
} // namespace compiler
|
|
|
|
} // namespace internal
|
|
|
|
} // namespace v8
|