[turbofan] fine grained in-block move optimization

So far, we've been moving down gaps wholesale. This change moves
individual move operations instead. This improves some benchmarks,
and should overall reduce code size, because it improves the chance of
reducing the number of moves.

For example, there are improvements on x64 in Emscripten (Bullet, in
particular) , JetStream geomean, Embenchen (zlib).

In the process of making this change, I noticed we can separate the
tasks performed by the move optimizer, as follows:

- group gaps into 1
- push gaps down, jumping instructions (these 2 were together before)
- merge blocks (and then push gaps down)
- finalize

We can do without a finalization list. This avoids duplicating storage -
we already have the list of instructions; it also simplifies the logic, since,
with this change, we may process an instruction's gap twice.

Compile time doesn't regress much (see pathological cases), but we
may want to avoid the allocations of the few sets used in the new code.
I'll do that in a subsequent change.

BUG=

Review URL: https://codereview.chromium.org/1634093002

Cr-Commit-Position: refs/heads/master@{#33715}
This commit is contained in:
mtrofin 2016-02-03 22:29:54 -08:00 committed by Commit bot
parent 152eb89129
commit 1ecf58f409
5 changed files with 248 additions and 103 deletions

View File

@ -633,8 +633,14 @@ class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
MoveOperands* AddMove(const InstructionOperand& from,
const InstructionOperand& to) {
auto zone = get_allocator().zone();
auto move = new (zone) MoveOperands(from, to);
Zone* zone = get_allocator().zone();
return AddMove(from, to, zone);
}
MoveOperands* AddMove(const InstructionOperand& from,
const InstructionOperand& to,
Zone* operand_allocation_zone) {
MoveOperands* move = new (operand_allocation_zone) MoveOperands(from, to);
push_back(move);
return move;
}

View File

@ -35,39 +35,6 @@ typedef ZoneMap<MoveKey, unsigned, MoveKeyCompare> MoveMap;
typedef ZoneSet<InstructionOperand, CompareOperandModuloType> OperandSet;
bool GapsCanMoveOver(Instruction* instr, Zone* zone) {
if (instr->IsNop()) return true;
if (instr->ClobbersTemps() || instr->ClobbersRegisters() ||
instr->ClobbersDoubleRegisters()) {
return false;
}
if (instr->arch_opcode() != ArchOpcode::kArchNop) return false;
ZoneSet<InstructionOperand, OperandCompare> operands(zone);
for (size_t i = 0; i < instr->InputCount(); ++i) {
operands.insert(*instr->InputAt(i));
}
for (size_t i = 0; i < instr->OutputCount(); ++i) {
operands.insert(*instr->OutputAt(i));
}
for (size_t i = 0; i < instr->TempCount(); ++i) {
operands.insert(*instr->TempAt(i));
}
for (int i = Instruction::GapPosition::FIRST_GAP_POSITION;
i <= Instruction::GapPosition::LAST_GAP_POSITION; ++i) {
ParallelMove* moves = instr->parallel_moves()[i];
if (moves == nullptr) continue;
for (MoveOperands* move : *moves) {
if (operands.count(move->source()) > 0 ||
operands.count(move->destination()) > 0) {
return false;
}
}
}
return true;
}
int FindFirstNonEmptySlot(const Instruction* instr) {
int i = Instruction::FIRST_GAP_POSITION;
for (; i <= Instruction::LAST_GAP_POSITION; i++) {
@ -88,11 +55,13 @@ int FindFirstNonEmptySlot(const Instruction* instr) {
MoveOptimizer::MoveOptimizer(Zone* local_zone, InstructionSequence* code)
: local_zone_(local_zone),
code_(code),
to_finalize_(local_zone),
local_vector_(local_zone) {}
void MoveOptimizer::Run() {
for (Instruction* instruction : code()->instructions()) {
CompressGaps(instruction);
}
for (InstructionBlock* block : code()->instruction_blocks()) {
CompressBlock(block);
}
@ -114,13 +83,140 @@ void MoveOptimizer::Run() {
}
OptimizeMerge(block);
}
for (Instruction* gap : to_finalize_) {
for (Instruction* gap : code()->instructions()) {
FinalizeMoves(gap);
}
}
void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
if (instruction->IsCall()) return;
ParallelMove* moves = instruction->parallel_moves()[0];
if (moves == nullptr) return;
void MoveOptimizer::CompressMoves(ParallelMove* left, ParallelMove* right) {
DCHECK(instruction->parallel_moves()[1] == nullptr ||
instruction->parallel_moves()[1]->empty());
OperandSet outputs(local_zone());
OperandSet inputs(local_zone());
// Outputs and temps are treated together as potentially clobbering a
// destination operand.
for (size_t i = 0; i < instruction->OutputCount(); ++i) {
outputs.insert(*instruction->OutputAt(i));
}
for (size_t i = 0; i < instruction->TempCount(); ++i) {
outputs.insert(*instruction->TempAt(i));
}
// Input operands block elisions.
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs.insert(*instruction->InputAt(i));
}
// Elide moves made redundant by the instruction.
for (MoveOperands* move : *moves) {
if (outputs.find(move->destination()) != outputs.end() &&
inputs.find(move->destination()) == inputs.end()) {
move->Eliminate();
}
}
// The ret instruction makes any assignment before it unnecessary, except for
// the one for its input.
if (instruction->opcode() == ArchOpcode::kArchRet) {
for (MoveOperands* move : *moves) {
if (inputs.find(move->destination()) == inputs.end()) {
move->Eliminate();
}
}
}
}
void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
if (from->IsCall()) return;
ParallelMove* from_moves = from->parallel_moves()[0];
if (from_moves == nullptr || from_moves->empty()) return;
ZoneSet<InstructionOperand, OperandCompare> dst_cant_be(local_zone());
ZoneSet<InstructionOperand, OperandCompare> src_cant_be(local_zone());
// If an operand is an input to the instruction, we cannot move assignments
// where it appears on the LHS.
for (size_t i = 0; i < from->InputCount(); ++i) {
dst_cant_be.insert(*from->InputAt(i));
}
// If an operand is output to the instruction, we cannot move assignments
// where it appears on the RHS, because we would lose its value before the
// instruction.
// Same for temp operands.
// The output can't appear on the LHS because we performed
// RemoveClobberedDestinations for the "from" instruction.
for (size_t i = 0; i < from->OutputCount(); ++i) {
src_cant_be.insert(*from->OutputAt(i));
}
for (size_t i = 0; i < from->TempCount(); ++i) {
src_cant_be.insert(*from->TempAt(i));
}
for (MoveOperands* move : *from_moves) {
if (move->IsRedundant()) continue;
// Assume dest has a value "V". If we have a "dest = y" move, then we can't
// move "z = dest", because z would become y rather than "V".
// We assume CompressMoves has happened before this, which means we don't
// have more than one assignment to dest.
src_cant_be.insert(move->destination());
}
ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
// We start with all the moves that don't have conflicting source or
// destination operands are eligible for being moved down.
for (MoveOperands* move : *from_moves) {
if (move->IsRedundant()) continue;
if (dst_cant_be.find(move->destination()) == dst_cant_be.end()) {
MoveKey key = {move->source(), move->destination()};
move_candidates.insert(key);
}
}
if (move_candidates.empty()) return;
// Stabilize the candidate set.
bool changed = false;
do {
changed = false;
for (auto iter = move_candidates.begin(); iter != move_candidates.end();) {
auto current = iter;
++iter;
InstructionOperand src = current->source;
if (src_cant_be.find(src) != src_cant_be.end()) {
src_cant_be.insert(current->destination);
move_candidates.erase(current);
changed = true;
}
}
} while (changed);
ParallelMove to_move(local_zone());
for (MoveOperands* move : *from_moves) {
if (move->IsRedundant()) continue;
MoveKey key = {move->source(), move->destination()};
if (move_candidates.find(key) != move_candidates.end()) {
to_move.AddMove(move->source(), move->destination(), code_zone());
move->Eliminate();
}
}
if (to_move.empty()) return;
ParallelMove* dest =
to->GetOrCreateParallelMove(Instruction::GapPosition::START, code_zone());
CompressMoves(&to_move, dest);
DCHECK(dest->empty());
for (MoveOperands* m : to_move) {
dest->push_back(m);
}
}
void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
if (right == nullptr) return;
MoveOpVector& eliminated = local_vector();
@ -150,54 +246,49 @@ void MoveOptimizer::CompressMoves(ParallelMove* left, ParallelMove* right) {
DCHECK(eliminated.empty());
}
void MoveOptimizer::CompressGaps(Instruction* instruction) {
int i = FindFirstNonEmptySlot(instruction);
bool has_moves = i <= Instruction::LAST_GAP_POSITION;
USE(has_moves);
// Smash all consecutive moves into the left most move slot and accumulate them
// as much as possible across instructions.
void MoveOptimizer::CompressBlock(InstructionBlock* block) {
Instruction* prev_instr = nullptr;
for (int index = block->code_start(); index < block->code_end(); ++index) {
Instruction* instr = code()->instructions()[index];
int i = FindFirstNonEmptySlot(instr);
bool has_moves = i <= Instruction::LAST_GAP_POSITION;
if (i == Instruction::LAST_GAP_POSITION) {
std::swap(instr->parallel_moves()[Instruction::FIRST_GAP_POSITION],
instr->parallel_moves()[Instruction::LAST_GAP_POSITION]);
} else if (i == Instruction::FIRST_GAP_POSITION) {
CompressMoves(instr->parallel_moves()[Instruction::FIRST_GAP_POSITION],
instr->parallel_moves()[Instruction::LAST_GAP_POSITION]);
}
// We either have no moves, or, after swapping or compressing, we have
// all the moves in the first gap position, and none in the second/end gap
// position.
ParallelMove* first =
instr->parallel_moves()[Instruction::FIRST_GAP_POSITION];
ParallelMove* last =
instr->parallel_moves()[Instruction::LAST_GAP_POSITION];
USE(last);
DCHECK(!has_moves ||
(first != nullptr && (last == nullptr || last->empty())));
if (prev_instr != nullptr) {
if (has_moves) {
// Smash first into prev_instr, killing left.
ParallelMove* pred_moves = prev_instr->parallel_moves()[0];
CompressMoves(pred_moves, first);
}
// Slide prev_instr down so we always know where to look for it.
std::swap(prev_instr->parallel_moves()[0], instr->parallel_moves()[0]);
}
prev_instr = instr->parallel_moves()[0] == nullptr ? nullptr : instr;
if (GapsCanMoveOver(instr, local_zone())) continue;
if (prev_instr != nullptr) {
to_finalize_.push_back(prev_instr);
prev_instr = nullptr;
}
if (i == Instruction::LAST_GAP_POSITION) {
std::swap(instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
} else if (i == Instruction::FIRST_GAP_POSITION) {
CompressMoves(
instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
}
if (prev_instr != nullptr) {
to_finalize_.push_back(prev_instr);
// We either have no moves, or, after swapping or compressing, we have
// all the moves in the first gap position, and none in the second/end gap
// position.
ParallelMove* first =
instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION];
ParallelMove* last =
instruction->parallel_moves()[Instruction::LAST_GAP_POSITION];
USE(first);
USE(last);
DCHECK(!has_moves ||
(first != nullptr && (last == nullptr || last->empty())));
}
void MoveOptimizer::CompressBlock(InstructionBlock* block) {
int first_instr_index = block->first_instruction_index();
int last_instr_index = block->last_instruction_index();
// Start by removing gap assignments where the output of the subsequent
// instruction appears on LHS, as long as they are not needed by its input.
Instruction* prev_instr = code()->instructions()[first_instr_index];
RemoveClobberedDestinations(prev_instr);
for (int index = first_instr_index + 1; index <= last_instr_index; ++index) {
Instruction* instr = code()->instructions()[index];
// Migrate to the gap of prev_instr eligible moves from instr.
MigrateMoves(instr, prev_instr);
// Remove gap assignments clobbered by instr's output.
RemoveClobberedDestinations(instr);
prev_instr = instr;
}
}
@ -258,15 +349,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
if (move_map.empty() || correct_counts == 0) return;
// Find insertion point.
Instruction* instr = nullptr;
for (int i = block->first_instruction_index();
i <= block->last_instruction_index(); ++i) {
instr = code()->instructions()[i];
if (correct_counts != move_map.size() ||
!GapsCanMoveOver(instr, local_zone()) || !instr->AreMovesRedundant())
break;
}
DCHECK_NOT_NULL(instr);
Instruction* instr = code()->instructions()[block->first_instruction_index()];
if (correct_counts != move_map.size()) {
// Moves that are unique to each predecessor won't be pushed to the common
@ -309,10 +392,8 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
DCHECK_NOT_NULL(instr);
bool gap_initialized = true;
if (instr->parallel_moves()[0] == nullptr ||
instr->parallel_moves()[0]->empty()) {
to_finalize_.push_back(instr);
} else {
if (instr->parallel_moves()[0] != nullptr &&
!instr->parallel_moves()[0]->empty()) {
// Will compress after insertion.
gap_initialized = false;
std::swap(instr->parallel_moves()[0], instr->parallel_moves()[1]);
@ -340,6 +421,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
if (!gap_initialized) {
CompressMoves(instr->parallel_moves()[0], instr->parallel_moves()[1]);
}
CompressBlock(block);
}
@ -368,8 +450,10 @@ void MoveOptimizer::FinalizeMoves(Instruction* instr) {
MoveOpVector& loads = local_vector();
DCHECK(loads.empty());
ParallelMove* parallel_moves = instr->parallel_moves()[0];
if (parallel_moves == nullptr) return;
// Find all the loads.
for (MoveOperands* move : *instr->parallel_moves()[0]) {
for (MoveOperands* move : *parallel_moves) {
if (move->IsRedundant()) continue;
if (move->source().IsConstant() || IsSlot(move->source())) {
loads.push_back(move);

View File

@ -26,15 +26,30 @@ class MoveOptimizer final {
Zone* code_zone() const { return code()->zone(); }
MoveOpVector& local_vector() { return local_vector_; }
void CompressBlock(InstructionBlock* blocke);
void CompressMoves(ParallelMove* left, ParallelMove* right);
// Consolidate moves into the first gap.
void CompressGaps(Instruction* instr);
// Attempt to push down to the last instruction those moves that can.
void CompressBlock(InstructionBlock* block);
// Consolidate moves into the first gap.
void CompressMoves(ParallelMove* left, MoveOpVector* right);
// Push down those moves in the gap of from that do not change the
// semantics of the from instruction, nor the semantics of the moves
// that remain behind.
void MigrateMoves(Instruction* to, Instruction* from);
void RemoveClobberedDestinations(Instruction* instruction);
const Instruction* LastInstruction(const InstructionBlock* block) const;
// Consolidate common moves appearing accross all predecessors of a block.
void OptimizeMerge(InstructionBlock* block);
void FinalizeMoves(Instruction* instr);
Zone* const local_zone_;
InstructionSequence* const code_;
Instructions to_finalize_;
MoveOpVector local_vector_;
DISALLOW_COPY_AND_ASSIGN(MoveOptimizer);

View File

@ -3339,8 +3339,31 @@ void LiveRangeConnector::ResolveControlFlow(Zone* local_zone) {
if (pred_op.Equals(cur_op)) continue;
if (!pred_op.IsAnyRegister() && cur_op.IsAnyRegister()) {
// We're doing a reload.
// We don't need to, if:
// 1) there's no register use in this block, and
// 2) the range ends before the block does, and
// 3) we don't have a successor, or the successor is spilled.
LifetimePosition block_start =
LifetimePosition::GapFromInstructionIndex(block->code_start());
LifetimePosition block_end =
LifetimePosition::GapFromInstructionIndex(block->code_end());
const LiveRange* current = result.cur_cover_;
const LiveRange* successor = current->next();
if (current->End() < block_end &&
(successor == nullptr || successor->spilled())) {
// verify point 1: no register use. We can go to the end of the
// range, since it's all within the block.
bool uses_reg = false;
for (const UsePosition* use = current->NextUsePosition(block_start);
use != nullptr; use = use->next()) {
if (use->operand()->IsAnyRegister()) {
uses_reg = true;
break;
}
}
if (!uses_reg) continue;
}
if (current->TopLevel()->IsSpilledOnlyInDeferredBlocks() &&
pred_block->IsDeferred()) {
// The spill location should be defined in pred_block, so add

View File

@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "src/compiler/move-optimizer.h"
#include "src/compiler/pipeline.h"
#include "test/unittests/compiler/instruction-sequence-unittest.h"
namespace v8 {
@ -227,8 +228,8 @@ TEST_F(MoveOptimizerTest, GapsCanMoveOverInstruction) {
ctant_def->GetParallelMove(Instruction::GapPosition::END);
ParallelMove* last_start =
last->GetParallelMove(Instruction::GapPosition::START);
CHECK(inst1_start == nullptr || inst1_start->size() == 0);
CHECK(inst1_end == nullptr || inst1_end->size() == 0);
CHECK(inst1_start == nullptr || NonRedundantSize(inst1_start) == 0);
CHECK(inst1_end == nullptr || NonRedundantSize(inst1_end) == 0);
CHECK(last_start->size() == 2);
int redundants = 0;
int assignment = 0;
@ -321,6 +322,22 @@ TEST_F(MoveOptimizerTest, GapConflictSubsetMovesDoNotMerge) {
CHECK(Contains(b1_move, Reg(0), Reg(1)));
}
TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) {
StartBlock();
EmitNop();
Instruction* first_instr = LastInstruction();
AddMove(first_instr, Reg(0), Reg(1));
EmitOI(Reg(1), 0, nullptr);
Instruction* last_instr = LastInstruction();
EndBlock();
Optimize();
ParallelMove* first_move = first_instr->parallel_moves()[0];
CHECK_EQ(0, NonRedundantSize(first_move));
ParallelMove* last_move = last_instr->parallel_moves()[0];
CHECK_EQ(0, NonRedundantSize(last_move));
}
} // namespace compiler
} // namespace internal