mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2024-10-19 03:20:14 +00:00
dd8400e150
This patch adds initial support for loop unrolling in the form of a series of utility classes which perform the unrolling. The pass can be run with the command spirv-opt --loop-unroll. This will unroll loops within the module which have the unroll hint set. The unroller imposes a number of requirements on the loops it can unroll. These are documented in the comments for the LoopUtils::CanPerformUnroll method in loop_utils.h. Some of the restrictions will be lifted in future patches.
940 lines
34 KiB
C++
940 lines
34 KiB
C++
// Copyright (c) 2018 Google LLC.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "opt/loop_unroller.h"
|
|
#include <map>
|
|
#include <memory>
|
|
#include <utility>
|
|
#include "opt/ir_builder.h"
|
|
#include "opt/loop_utils.h"
|
|
|
|
// Implements loop util unrolling functionality for fully and partially
|
|
// unrolling loops. Given a factor it will duplicate the loop that many times,
|
|
// appending each one to the end of the old loop and removing backedges, to
|
|
// create a new unrolled loop.
|
|
//
|
|
// 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
|
|
// loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
|
|
// validate that a given loop can be unrolled. That method (along with the
|
|
// constructor of loop) checks that the IR is in the expected canonicalised
|
|
// format.
|
|
//
|
|
// 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
|
|
// perform the unrolling. This implements helper methods to copy the loop basic
|
|
// blocks and remap the ids of instructions used inside them.
|
|
//
|
|
// 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
|
|
// actually performs the loop duplication. It does this by creating a
|
|
// LoopUnrollState object and then copying the loop as given by the factor
|
|
// parameter. The LoopUnrollState object retains the state of the unroller
|
|
// between the loop body copies as each iteration needs information on the last
|
|
// to adjust the phi induction variable, adjust the OpLoopMerge instruction in
|
|
// the main loop header, and change the previous continue block to point to the
|
|
// new header and the new continue block to the main loop header.
|
|
//
|
|
// 4 - If the loop is to be fully unrolled then it is simply closed after step
|
|
// 3, with the OpLoopMerge being deleted, the backedge removed, and the
|
|
// condition blocks folded.
|
|
//
|
|
// 5 - If it is being partially unrolled: if the unrolling factor leaves the
|
|
// loop with an even number of bodies with respect to the number of loop
|
|
// iterations then step 3 is all that is needed. If it is uneven then we need to
|
|
// duplicate the loop completely and unroll the duplicated loop to cover the
|
|
// residual part and adjust the first loop to cover only the "even" part. For
|
|
// instance if you request an unroll factor of 3 on a loop with 10 iterations
|
|
// then copying the body three times would leave you with three bodies in the
|
|
// loop
|
|
// where the loop still iterates over each 4 times. So we make two loops one
|
|
// iterating once then a second loop of three iterating 3 times.
|
|
|
|
namespace spvtools {
|
|
namespace opt {
|
|
namespace {
|
|
|
|
// This utility class encapsulates some of the state we need to maintain between
|
|
// loop unrolls. Specifically it maintains key blocks and the induction variable
|
|
// in the current loop duplication step and the blocks from the previous one.
|
|
// This is because each step of the unroll needs to use data from both the
|
|
// preceding step and the original loop.
|
|
struct LoopUnrollState {
|
|
LoopUnrollState()
|
|
: previous_phi_(nullptr),
|
|
previous_continue_block_(nullptr),
|
|
previous_condition_block_(nullptr),
|
|
new_phi(nullptr),
|
|
new_continue_block(nullptr),
|
|
new_condition_block(nullptr),
|
|
new_header_block(nullptr) {}
|
|
|
|
// Initialize from the loop descriptor class.
|
|
LoopUnrollState(ir::Instruction* induction, ir::BasicBlock* continue_block,
|
|
ir::BasicBlock* condition)
|
|
: previous_phi_(induction),
|
|
previous_continue_block_(continue_block),
|
|
previous_condition_block_(condition),
|
|
new_phi(nullptr),
|
|
new_continue_block(nullptr),
|
|
new_condition_block(nullptr),
|
|
new_header_block(nullptr) {}
|
|
|
|
// Swap the state so that the new nodes are now the previous nodes.
|
|
void NextIterationState() {
|
|
previous_phi_ = new_phi;
|
|
previous_continue_block_ = new_continue_block;
|
|
previous_condition_block_ = new_condition_block;
|
|
|
|
// Clear new nodes.
|
|
new_phi = nullptr;
|
|
new_continue_block = nullptr;
|
|
new_condition_block = nullptr;
|
|
new_header_block = nullptr;
|
|
|
|
// Clear new block/instruction maps.
|
|
new_blocks.clear();
|
|
new_inst.clear();
|
|
}
|
|
|
|
// The induction variable from the immediately preceding loop body.
|
|
ir::Instruction* previous_phi_;
|
|
|
|
// The previous continue block. The backedge will be removed from this and
|
|
// added to the new continue block.
|
|
ir::BasicBlock* previous_continue_block_;
|
|
|
|
// The previous condition block. This may be folded to flatten the loop.
|
|
ir::BasicBlock* previous_condition_block_;
|
|
|
|
// The new induction variable.
|
|
ir::Instruction* new_phi;
|
|
|
|
// The new continue block.
|
|
ir::BasicBlock* new_continue_block;
|
|
|
|
// The new condition block.
|
|
ir::BasicBlock* new_condition_block;
|
|
|
|
// The new header block.
|
|
ir::BasicBlock* new_header_block;
|
|
|
|
// A mapping of new block ids to the original blocks which they were copied
|
|
// from.
|
|
std::unordered_map<uint32_t, ir::BasicBlock*> new_blocks;
|
|
|
|
// A mapping of new instruction ids to the instruction ids from which they
|
|
// were copied.
|
|
std::unordered_map<uint32_t, uint32_t> new_inst;
|
|
};
|
|
|
|
// This class implements the actual unrolling. It uses a LoopUnrollState to
|
|
// maintain the state of the unrolling inbetween steps.
|
|
class LoopUnrollerUtilsImpl {
|
|
public:
|
|
using BasicBlockListTy = std::vector<std::unique_ptr<ir::BasicBlock>>;
|
|
|
|
LoopUnrollerUtilsImpl(ir::IRContext* c, ir::Function* function)
|
|
: context_(c),
|
|
function_(*function),
|
|
loop_condition_block_(nullptr),
|
|
loop_induction_variable_(nullptr),
|
|
number_of_loop_iterations_(0),
|
|
loop_step_value_(0),
|
|
loop_init_value_(0) {}
|
|
|
|
// Unroll the |loop| by given |factor| by copying the whole body |factor|
|
|
// times. The resulting basicblock structure will remain a loop.
|
|
void PartiallyUnroll(ir::Loop*, size_t factor);
|
|
|
|
// If partially unrolling the |loop| would leave the loop with too many bodies
|
|
// for its number of iterations then this method should be used. This method
|
|
// will duplicate the |loop| completely, making the duplicated loop the
|
|
// successor of the original's merge block. The original loop will have its
|
|
// condition changed to loop over the residual part and the duplicate will be
|
|
// partially unrolled. The resulting structure will be two loops.
|
|
void PartiallyUnrollResidualFactor(ir::Loop* loop, size_t factor);
|
|
|
|
// Fully unroll the |loop| by copying the full body by the total number of
|
|
// loop iterations, folding all conditions, and removing the backedge from the
|
|
// continue block to the header.
|
|
void FullyUnroll(ir::Loop* loop);
|
|
|
|
// Get the ID of the variable in the |phi| paired with |label|.
|
|
uint32_t GetPhiDefID(const ir::Instruction* phi, uint32_t label) const;
|
|
|
|
// Close the loop by removing the OpLoopMerge from the |loop| header block and
|
|
// making the backedge point to the merge block.
|
|
void CloseUnrolledLoop(ir::Loop* loop);
|
|
|
|
// Remove the OpConditionalBranch instruction inside |conditional_block| used
|
|
// to branch to either exit or continue the loop and replace it with an
|
|
// unconditional OpBranch to block |new_target|.
|
|
void FoldConditionBlock(ir::BasicBlock* condtion_block, uint32_t new_target);
|
|
|
|
// Add all blocks_to_add_ to function_ at the |insert_point|.
|
|
void AddBlocksToFunction(const ir::BasicBlock* insert_point);
|
|
|
|
// Duplicates the |old_loop|, cloning each body and remaping the ids without
|
|
// removing instructions or changing relative structure. Result will be stored
|
|
// in |new_loop|.
|
|
void DuplicateLoop(ir::Loop* old_loop, ir::Loop* new_loop);
|
|
|
|
inline size_t GetLoopIterationCount() const {
|
|
return number_of_loop_iterations_;
|
|
}
|
|
|
|
// Extracts the initial state information from the |loop|.
|
|
void Init(ir::Loop* loop);
|
|
|
|
private:
|
|
// Remap all the in |basic_block| to new IDs and keep the mapping of new ids
|
|
// to old
|
|
// ids. |loop| is used to identify special loop blocks (header, continue,
|
|
// ect).
|
|
void AssignNewResultIds(ir::BasicBlock* basic_block);
|
|
|
|
// Using the map built by AssignNewResultIds, for each instruction in
|
|
// |basic_block| use
|
|
// that map to substitute the IDs used by instructions (in the operands) with
|
|
// the new ids.
|
|
void RemapOperands(ir::BasicBlock* basic_block);
|
|
|
|
// Copy the whole body of the loop, all blocks dominated by the |loop| header
|
|
// and not dominated by the |loop| merge. The copied body will be linked to by
|
|
// the old |loop| continue block and the new body will link to the |loop|
|
|
// header via the new continue block. |eliminate_conditions| is used to decide
|
|
// whether or not to fold all the condition blocks other than the last one.
|
|
void CopyBody(ir::Loop* loop, bool eliminate_conditions);
|
|
|
|
// Copy a given |block_to_copy| in the |loop| and record the mapping of the
|
|
// old/new ids. |preserve_instructions| determines whether or not the method
|
|
// will modify (other than result_id) instructions which are copied.
|
|
void CopyBasicBlock(ir::Loop* loop, const ir::BasicBlock* block_to_copy,
|
|
bool preserve_instructions);
|
|
|
|
// The actual implementation of the unroll step. Unrolls |loop| by given
|
|
// |factor| by copying the body by |factor| times. Also propagates the
|
|
// induction variable value throughout the copies.
|
|
void Unroll(ir::Loop* loop, size_t factor);
|
|
|
|
// Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
|
|
// as computed by the method ComputeLoopOrderedBlocks.
|
|
void ComputeLoopOrderedBlocks(ir::Loop* loop);
|
|
|
|
// Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
|
|
// the parent exists.
|
|
void AddBlocksToLoop(ir::Loop* loop) const;
|
|
|
|
// A pointer to the IRContext. Used to add/remove instructions and for usedef
|
|
// chains.
|
|
ir::IRContext* context_;
|
|
|
|
// A reference the function the loop is within.
|
|
ir::Function& function_;
|
|
|
|
// A list of basic blocks to be added to the loop at the end of an unroll
|
|
// step.
|
|
BasicBlockListTy blocks_to_add_;
|
|
|
|
// List of instructions which are now dead and can be removed.
|
|
std::vector<ir::Instruction*> dead_instructions_;
|
|
|
|
// Maintains the current state of the transform between calls to unroll.
|
|
LoopUnrollState state_;
|
|
|
|
// An ordered list containing the loop basic blocks.
|
|
std::vector<ir::BasicBlock*> loop_blocks_inorder_;
|
|
|
|
// The block containing the condition check which contains a conditional
|
|
// branch to the merge and continue block.
|
|
ir::BasicBlock* loop_condition_block_;
|
|
|
|
// The induction variable of the loop.
|
|
ir::Instruction* loop_induction_variable_;
|
|
|
|
// The number of loop iterations that the loop would preform pre-unroll.
|
|
size_t number_of_loop_iterations_;
|
|
|
|
// The amount that the loop steps each iteration.
|
|
int64_t loop_step_value_;
|
|
|
|
// The value the loop starts stepping from.
|
|
int64_t loop_init_value_;
|
|
};
|
|
|
|
/*
|
|
* Static helper functions.
|
|
*/
|
|
|
|
// Retrieve the index of the OpPhi instruction |phi| which corresponds to the
|
|
// incoming |block| id.
|
|
static uint32_t GetPhiIndexFromLabel(const ir::BasicBlock* block,
|
|
const ir::Instruction* phi) {
|
|
for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
|
|
if (block->id() == phi->GetSingleWordInOperand(i)) {
|
|
return i;
|
|
}
|
|
}
|
|
assert(false && "Could not find operand in instruction.");
|
|
return 0;
|
|
}
|
|
|
|
void LoopUnrollerUtilsImpl::Init(ir::Loop* loop) {
|
|
loop_condition_block_ = loop->FindConditionBlock();
|
|
|
|
// When we reinit the second loop during PartiallyUnrollResidualFactor we need
|
|
// to use the cached value from the duplicate step as the dominator tree
|
|
// basded solution, loop->FindConditionBlock, requires all the nodes to be
|
|
// connected up with the correct branches. They won't be at this point.
|
|
if (!loop_condition_block_) {
|
|
loop_condition_block_ = state_.new_condition_block;
|
|
}
|
|
assert(loop_condition_block_);
|
|
|
|
loop_induction_variable_ = loop->FindInductionVariable(loop_condition_block_);
|
|
assert(loop_induction_variable_);
|
|
|
|
bool found = loop->FindNumberOfIterations(
|
|
loop_induction_variable_, &*loop_condition_block_->ctail(),
|
|
&number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
|
|
(void)found; // To silence unused variable warning on release builds.
|
|
assert(found);
|
|
ComputeLoopOrderedBlocks(loop);
|
|
}
|
|
|
|
// This function is used to partially unroll the loop when the factor provided
|
|
// would normally lead to an illegal optimization. Instead of just unrolling the
|
|
// loop it creates two loops and unrolls one and adjusts the condition on the
|
|
// other. The end result being that the new loop pair iterates over the correct
|
|
// number of bodies.
|
|
void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop,
|
|
size_t factor) {
|
|
// Create a new merge block for the first loop.
|
|
std::unique_ptr<ir::Instruction> new_label{new ir::Instruction(
|
|
context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
|
|
std::unique_ptr<ir::BasicBlock> new_exit_bb{
|
|
new ir::BasicBlock(std::move(new_label))};
|
|
|
|
// Save the id of the block before we move it.
|
|
uint32_t new_merge_id = new_exit_bb->id();
|
|
|
|
// Add the block the list of blocks to add, we want this merge block to be
|
|
// right at the start of the new blocks.
|
|
blocks_to_add_.push_back(std::move(new_exit_bb));
|
|
ir::BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
|
|
ir::Instruction& original_conditional_branch = *loop_condition_block_->tail();
|
|
|
|
// Duplicate the loop, providing access to the blocks of both loops.
|
|
// This is a naked new due to the VS2013 requirement of not having unique
|
|
// pointers in vectors, as it will be inserted into a vector with
|
|
// loop_descriptor.AddLoop.
|
|
ir::Loop* new_loop = new ir::Loop(*loop);
|
|
|
|
// Clear the basic blocks of the new loop.
|
|
new_loop->ClearBlocks();
|
|
|
|
DuplicateLoop(loop, new_loop);
|
|
|
|
// Add the blocks to the function.
|
|
AddBlocksToFunction(loop->GetMergeBlock());
|
|
blocks_to_add_.clear();
|
|
|
|
InstructionBuilder builder{context_, new_exit_bb_raw};
|
|
// Make the first loop branch to the second.
|
|
builder.AddBranch(new_loop->GetHeaderBlock()->id());
|
|
|
|
loop_condition_block_ = state_.new_condition_block;
|
|
loop_induction_variable_ = state_.new_phi;
|
|
|
|
// Unroll the new loop by the factor with the usual -1 to account for the
|
|
// existing block iteration.
|
|
Unroll(new_loop, factor);
|
|
|
|
// We need to account for the initial body when calculating the remainder.
|
|
int64_t remainder = loop_init_value_ +
|
|
(number_of_loop_iterations_ % factor) * loop_step_value_;
|
|
|
|
assert(remainder > std::numeric_limits<int32_t>::min() &&
|
|
remainder < std::numeric_limits<int32_t>::max());
|
|
|
|
ir::Instruction* new_constant = nullptr;
|
|
|
|
// If the remainder is negative then we add a signed constant, otherwise just
|
|
// add an unsigned constant.
|
|
if (remainder < 0) {
|
|
new_constant =
|
|
builder.Add32BitSignedIntegerConstant(static_cast<int32_t>(remainder));
|
|
} else {
|
|
new_constant = builder.Add32BitUnsignedIntegerConstant(
|
|
static_cast<int32_t>(remainder));
|
|
}
|
|
|
|
uint32_t constant_id = new_constant->result_id();
|
|
|
|
// Add the merge block to the back of the binary.
|
|
blocks_to_add_.push_back(
|
|
std::unique_ptr<ir::BasicBlock>(new_loop->GetMergeBlock()));
|
|
|
|
AddBlocksToLoop(new_loop);
|
|
// Add the blocks to the function.
|
|
AddBlocksToFunction(loop->GetMergeBlock());
|
|
|
|
// Reset the usedef analysis.
|
|
context_->InvalidateAnalysesExceptFor(
|
|
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
|
|
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
|
|
|
|
// Update the condition check.
|
|
ir::Instruction* condition_check = def_use_manager->GetDef(
|
|
original_conditional_branch.GetSingleWordOperand(0));
|
|
|
|
// This should have been checked by the LoopUtils::CanPerformUnroll function
|
|
// before entering this.
|
|
assert(condition_check->opcode() == SpvOpSLessThan);
|
|
condition_check->SetInOperand(1, {constant_id});
|
|
|
|
// Update the next phi node. The phi will have a constant value coming in from
|
|
// the preheader block. For the duplicated loop we need to update the constant
|
|
// to be the amount of iterations covered by the first loop and the incoming
|
|
// block to be the first loops new merge block.
|
|
uint32_t phi_incoming_index =
|
|
GetPhiIndexFromLabel(loop->GetPreHeaderBlock(), loop_induction_variable_);
|
|
loop_induction_variable_->SetInOperand(phi_incoming_index - 1, {constant_id});
|
|
loop_induction_variable_->SetInOperand(phi_incoming_index, {new_merge_id});
|
|
|
|
context_->InvalidateAnalysesExceptFor(
|
|
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
|
|
|
|
context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
|
|
|
|
ir::LoopDescriptor& loop_descriptor =
|
|
*context_->GetLoopDescriptor(&function_);
|
|
|
|
loop_descriptor.AddLoop(new_loop, loop->GetParent());
|
|
}
|
|
|
|
// Duplicate the |loop| body |factor| number of times while keeping the loop
|
|
// backedge intact.
|
|
void LoopUnrollerUtilsImpl::PartiallyUnroll(ir::Loop* loop, size_t factor) {
|
|
Unroll(loop, factor);
|
|
AddBlocksToLoop(loop);
|
|
AddBlocksToFunction(loop->GetMergeBlock());
|
|
}
|
|
|
|
// Duplicate the |loop| body |factor| number of times while keeping the loop
|
|
// backedge intact.
|
|
void LoopUnrollerUtilsImpl::Unroll(ir::Loop* loop, size_t factor) {
|
|
state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
|
|
loop_condition_block_};
|
|
for (size_t i = 0; i < factor - 1; ++i) {
|
|
CopyBody(loop, true);
|
|
}
|
|
|
|
uint32_t phi_index = GetPhiIndexFromLabel(state_.previous_continue_block_,
|
|
state_.previous_phi_);
|
|
uint32_t phi_variable =
|
|
state_.previous_phi_->GetSingleWordInOperand(phi_index - 1);
|
|
uint32_t phi_label = state_.previous_phi_->GetSingleWordInOperand(phi_index);
|
|
|
|
ir::Instruction* original_phi = loop_induction_variable_;
|
|
|
|
// SetInOperands are offset by two.
|
|
original_phi->SetInOperand(phi_index - 1, {phi_variable});
|
|
original_phi->SetInOperand(phi_index, {phi_label});
|
|
}
|
|
|
|
// Fully unroll the loop by partially unrolling it by the number of loop
|
|
// iterations minus one for the body already accounted for.
|
|
void LoopUnrollerUtilsImpl::FullyUnroll(ir::Loop* loop) {
|
|
// We unroll the loop by number of iterations in the loop.
|
|
Unroll(loop, number_of_loop_iterations_);
|
|
|
|
// The first condition block is preserved until now so it can be copied.
|
|
FoldConditionBlock(loop_condition_block_, 1);
|
|
|
|
// Delete the OpLoopMerge and remove the backedge to the header.
|
|
CloseUnrolledLoop(loop);
|
|
|
|
// Mark the loop for later deletion. This allows us to preserve the loop
|
|
// iterators but still disregard dead loops.
|
|
loop->MarkLoopForRemoval();
|
|
|
|
// If the loop has a parent add the new blocks to the parent.
|
|
if (loop->GetParent()) {
|
|
AddBlocksToLoop(loop->GetParent());
|
|
}
|
|
|
|
// Add the blocks to the function.
|
|
AddBlocksToFunction(loop->GetMergeBlock());
|
|
|
|
// Invalidate all analyses.
|
|
context_->InvalidateAnalysesExceptFor(
|
|
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
|
|
}
|
|
|
|
// Copy a given basic block, give it a new result_id, and store the new block
|
|
// and the id mapping in the state. |preserve_instructions| is used to determine
|
|
// whether or not this function should edit instructions other than the
|
|
// |result_id|.
|
|
void LoopUnrollerUtilsImpl::CopyBasicBlock(ir::Loop* loop,
|
|
const ir::BasicBlock* itr,
|
|
bool preserve_instructions) {
|
|
// Clone the block exactly, including the IDs.
|
|
ir::BasicBlock* basic_block = itr->Clone(context_);
|
|
|
|
basic_block->SetParent(itr->GetParent());
|
|
|
|
// Assign each result a new unique ID and keep a mapping of the old ids to
|
|
// the new ones.
|
|
AssignNewResultIds(basic_block);
|
|
|
|
// If this is the continue block we are copying.
|
|
if (itr == loop->GetLatchBlock()) {
|
|
// Make the OpLoopMerge point to this block for the continue.
|
|
if (!preserve_instructions) {
|
|
ir::Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
|
|
merge_inst->SetInOperand(1, {basic_block->id()});
|
|
}
|
|
|
|
state_.new_continue_block = basic_block;
|
|
}
|
|
|
|
// If this is the header block we are copying.
|
|
if (itr == loop->GetHeaderBlock()) {
|
|
state_.new_header_block = basic_block;
|
|
|
|
if (!preserve_instructions) {
|
|
// Remove the loop merge instruction if it exists.
|
|
ir::Instruction* merge_inst = basic_block->GetLoopMergeInst();
|
|
if (merge_inst) context_->KillInst(merge_inst);
|
|
}
|
|
}
|
|
|
|
// If this is the condition block we are copying.
|
|
if (itr == loop_condition_block_) {
|
|
state_.new_condition_block = basic_block;
|
|
}
|
|
|
|
// Add this block to the list of blocks to add to the function at the end of
|
|
// the unrolling process.
|
|
blocks_to_add_.push_back(std::unique_ptr<ir::BasicBlock>(basic_block));
|
|
|
|
// Keep tracking the old block via a map.
|
|
state_.new_blocks[itr->id()] = basic_block;
|
|
}
|
|
|
|
void LoopUnrollerUtilsImpl::CopyBody(ir::Loop* loop,
|
|
bool eliminate_conditions) {
|
|
// Copy each basic block in the loop, give them new ids, and save state
|
|
// information.
|
|
for (const ir::BasicBlock* itr : loop_blocks_inorder_) {
|
|
CopyBasicBlock(loop, itr, false);
|
|
}
|
|
|
|
// Set the previous continue block to point to the new header.
|
|
ir::Instruction& continue_branch = *state_.previous_continue_block_->tail();
|
|
continue_branch.SetInOperand(0, {state_.new_header_block->id()});
|
|
|
|
// As the algorithm copies the original loop blocks exactly, the tail of the
|
|
// latch block on iterations after the first one will be a branch to the new
|
|
// header and not the actual loop header. The last continue block in the loop
|
|
// should always be a backedge to the global header.
|
|
ir::Instruction& new_continue_branch = *state_.new_continue_block->tail();
|
|
new_continue_branch.SetInOperand(0, {loop->GetHeaderBlock()->id()});
|
|
|
|
// Update references to the old phi node with the actual variable.
|
|
const ir::Instruction* induction = loop_induction_variable_;
|
|
state_.new_inst[induction->result_id()] =
|
|
GetPhiDefID(state_.previous_phi_, state_.previous_continue_block_->id());
|
|
|
|
if (eliminate_conditions &&
|
|
state_.new_condition_block != loop_condition_block_) {
|
|
FoldConditionBlock(state_.new_condition_block, 1);
|
|
}
|
|
|
|
// Only reference to the header block is the backedge in the latch block,
|
|
// don't change this.
|
|
state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
|
|
|
|
for (auto& pair : state_.new_blocks) {
|
|
RemapOperands(pair.second);
|
|
}
|
|
|
|
dead_instructions_.push_back(state_.new_phi);
|
|
|
|
// Swap the state so the new is now the previous.
|
|
state_.NextIterationState();
|
|
}
|
|
|
|
uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const ir::Instruction* phi,
|
|
uint32_t label) const {
|
|
for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
|
|
if (phi->GetSingleWordOperand(operand) == label) {
|
|
return phi->GetSingleWordOperand(operand - 1);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void LoopUnrollerUtilsImpl::FoldConditionBlock(ir::BasicBlock* condition_block,
|
|
uint32_t operand_label) {
|
|
// Remove the old conditional branch to the merge and continue blocks.
|
|
ir::Instruction& old_branch = *condition_block->tail();
|
|
uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
|
|
context_->KillInst(&old_branch);
|
|
|
|
// Add the new unconditional branch to the merge block.
|
|
InstructionBuilder builder{context_, condition_block};
|
|
builder.AddBranch(new_target);
|
|
}
|
|
|
|
void LoopUnrollerUtilsImpl::CloseUnrolledLoop(ir::Loop* loop) {
|
|
// Remove the OpLoopMerge instruction from the function.
|
|
ir::Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
|
|
context_->KillInst(merge_inst);
|
|
|
|
// Remove the final backedge to the header and make it point instead to the
|
|
// merge block.
|
|
state_.previous_continue_block_->tail()->SetInOperand(
|
|
0, {loop->GetMergeBlock()->id()});
|
|
|
|
// Remove the induction variable as the phi will now be invalid. Replace all
|
|
// uses with the constant initializer value (all uses of the phi will be in
|
|
// the first iteration with the subsequent phis already having been removed.
|
|
uint32_t initalizer_id =
|
|
GetPhiDefID(loop_induction_variable_, loop->GetPreHeaderBlock()->id());
|
|
context_->ReplaceAllUsesWith(loop_induction_variable_->result_id(),
|
|
initalizer_id);
|
|
|
|
// Remove the now unused phi.
|
|
context_->KillInst(loop_induction_variable_);
|
|
}
|
|
|
|
// Uses the first loop to create a copy of the loop with new IDs.
|
|
void LoopUnrollerUtilsImpl::DuplicateLoop(ir::Loop* old_loop,
|
|
ir::Loop* new_loop) {
|
|
std::vector<ir::BasicBlock*> new_block_order;
|
|
|
|
// Copy every block in the old loop.
|
|
for (const ir::BasicBlock* itr : loop_blocks_inorder_) {
|
|
CopyBasicBlock(old_loop, itr, true);
|
|
new_block_order.push_back(blocks_to_add_.back().get());
|
|
}
|
|
|
|
ir::BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
|
|
new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
|
|
AssignNewResultIds(new_merge);
|
|
state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
|
|
for (auto& pair : state_.new_blocks) {
|
|
RemapOperands(pair.second);
|
|
}
|
|
|
|
loop_blocks_inorder_ = std::move(new_block_order);
|
|
|
|
AddBlocksToLoop(new_loop);
|
|
|
|
new_loop->SetHeaderBlock(state_.new_header_block);
|
|
new_loop->SetLatchBlock(state_.new_continue_block);
|
|
new_loop->SetMergeBlock(new_merge);
|
|
}
|
|
|
|
void LoopUnrollerUtilsImpl::AddBlocksToFunction(
|
|
const ir::BasicBlock* insert_point) {
|
|
for (ir::Instruction* inst : dead_instructions_) {
|
|
context_->KillInst(inst);
|
|
}
|
|
|
|
for (auto basic_block_iterator = function_.begin();
|
|
basic_block_iterator != function_.end(); ++basic_block_iterator) {
|
|
if (basic_block_iterator->id() == insert_point->id()) {
|
|
basic_block_iterator.InsertBefore(&blocks_to_add_);
|
|
return;
|
|
}
|
|
}
|
|
|
|
assert(
|
|
false &&
|
|
"Could not add basic blocks to function as insert point was not found.");
|
|
}
|
|
|
|
// Assign all result_ids in |basic_block| instructions to new IDs and preserve
|
|
// the mapping of new ids to old ones.
|
|
void LoopUnrollerUtilsImpl::AssignNewResultIds(ir::BasicBlock* basic_block) {
|
|
// Label instructions aren't covered by normal traversal of the
|
|
// instructions.
|
|
uint32_t new_label_id = context_->TakeNextId();
|
|
|
|
// Assign a new id to the label.
|
|
state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
|
|
basic_block->GetLabelInst()->SetResultId(new_label_id);
|
|
|
|
for (ir::Instruction& inst : *basic_block) {
|
|
uint32_t old_id = inst.result_id();
|
|
|
|
// Ignore stores etc.
|
|
if (old_id == 0) {
|
|
continue;
|
|
}
|
|
|
|
// Give the instruction a new id.
|
|
inst.SetResultId(context_->TakeNextId());
|
|
|
|
// Save the mapping of old_id -> new_id.
|
|
state_.new_inst[old_id] = inst.result_id();
|
|
|
|
// Check if this instruction is the induction variable.
|
|
if (loop_induction_variable_->result_id() == old_id) {
|
|
// Save a pointer to the new copy of it.
|
|
state_.new_phi = &inst;
|
|
}
|
|
}
|
|
}
|
|
|
|
// For all instructions in |basic_block| check if the operands used are from a
|
|
// copied instruction and if so swap out the operand for the copy of it.
|
|
void LoopUnrollerUtilsImpl::RemapOperands(ir::BasicBlock* basic_block) {
|
|
for (ir::Instruction& inst : *basic_block) {
|
|
auto remap_operands_to_new_ids = [this](uint32_t* id) {
|
|
auto itr = state_.new_inst.find(*id);
|
|
if (itr != state_.new_inst.end()) {
|
|
*id = itr->second;
|
|
}
|
|
};
|
|
|
|
inst.ForEachInId(remap_operands_to_new_ids);
|
|
}
|
|
}
|
|
|
|
// Generate the ordered list of basic blocks in the |loop| and cache it for
|
|
// later use.
|
|
void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(ir::Loop* loop) {
|
|
loop_blocks_inorder_.clear();
|
|
|
|
opt::DominatorAnalysis* analysis =
|
|
context_->GetDominatorAnalysis(&function_, *context_->cfg());
|
|
opt::DominatorTree& tree = analysis->GetDomTree();
|
|
|
|
// Starting at the loop header BasicBlock, traverse the dominator tree until
|
|
// we reach the merge block and add every node we traverse to the set of
|
|
// blocks
|
|
// which we consider to be the loop.
|
|
auto begin_itr = tree.GetTreeNode(loop->GetHeaderBlock())->df_begin();
|
|
const ir::BasicBlock* merge = loop->GetMergeBlock();
|
|
auto func = [merge, &tree, this](DominatorTreeNode* node) {
|
|
if (!tree.Dominates(merge->id(), node->id())) {
|
|
this->loop_blocks_inorder_.push_back(node->bb_);
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
tree.VisitChildrenIf(func, begin_itr);
|
|
}
|
|
|
|
// Adds the blocks_to_add_ to both the loop and to the parent.
|
|
void LoopUnrollerUtilsImpl::AddBlocksToLoop(ir::Loop* loop) const {
|
|
// Add the blocks to this loop.
|
|
for (auto& block_itr : blocks_to_add_) {
|
|
loop->AddBasicBlock(block_itr.get());
|
|
}
|
|
|
|
// Add the blocks to the parent as well.
|
|
if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
|
|
}
|
|
|
|
/*
|
|
* End LoopUtilsImpl.
|
|
*/
|
|
|
|
} // namespace
|
|
|
|
/*
|
|
*
|
|
* Begin Utils.
|
|
*
|
|
* */
|
|
|
|
bool LoopUtils::CanPerformUnroll() {
|
|
// The loop is expected to be in structured order.
|
|
if (!loop_->GetHeaderBlock()->GetMergeInst()) {
|
|
return false;
|
|
}
|
|
|
|
// Find check the loop has a condition we can find and evaluate.
|
|
const ir::BasicBlock* condition = loop_->FindConditionBlock();
|
|
if (!condition) return false;
|
|
|
|
// Check that we can find and process the induction variable.
|
|
const ir::Instruction* induction = loop_->FindInductionVariable(condition);
|
|
if (!induction || induction->opcode() != SpvOpPhi) return false;
|
|
|
|
// Check that we can find the number of loop iterations.
|
|
if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
|
|
return false;
|
|
|
|
// Make sure the continue block is a unconditional branch to the header
|
|
// block.
|
|
const ir::Instruction& branch = *loop_->GetLatchBlock()->ctail();
|
|
bool branching_assumption =
|
|
branch.opcode() == SpvOpBranch &&
|
|
branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
|
|
if (!branching_assumption) {
|
|
return false;
|
|
}
|
|
|
|
// Make sure the induction is the only phi instruction we have in the loop
|
|
// header. Other optimizations have been seen to leave dead phi nodes in the
|
|
// header so we also check that the phi is used.
|
|
for (const ir::Instruction& inst : *loop_->GetHeaderBlock()) {
|
|
if (inst.opcode() == SpvOpPhi &&
|
|
inst.result_id() != induction->result_id()) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Ban breaks within the loop.
|
|
const std::vector<uint32_t>& merge_block_preds =
|
|
context_->cfg()->preds(loop_->GetMergeBlock()->id());
|
|
if (merge_block_preds.size() != 1) {
|
|
return false;
|
|
}
|
|
|
|
// Ban continues within the loop.
|
|
const std::vector<uint32_t>& continue_block_preds =
|
|
context_->cfg()->preds(loop_->GetLatchBlock()->id());
|
|
if (continue_block_preds.size() != 1) {
|
|
return false;
|
|
}
|
|
|
|
// Ban returns in the loop.
|
|
// Iterate over all the blocks within the loop and check that none of them
|
|
// exit the loop.
|
|
for (uint32_t label_id : loop_->GetBlocks()) {
|
|
const ir::BasicBlock* block = context_->cfg()->block(label_id);
|
|
if (block->ctail()->opcode() == SpvOp::SpvOpKill ||
|
|
block->ctail()->opcode() == SpvOp::SpvOpReturn ||
|
|
block->ctail()->opcode() == SpvOp::SpvOpReturnValue) {
|
|
return false;
|
|
}
|
|
}
|
|
// Can only unroll inner loops.
|
|
if (!loop_->AreAllChildrenMarkedForRemoval()) {
|
|
return false;
|
|
}
|
|
|
|
for (uint32_t block_id : loop_->GetBlocks()) {
|
|
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
|
|
|
|
ir::BasicBlock& bb = *context_->cfg()->block(block_id);
|
|
// For every instruction in the block.
|
|
for (ir::Instruction& inst : bb) {
|
|
if (inst.result_id() == 0) continue;
|
|
|
|
auto is_used_outside_loop = [this,
|
|
def_use_manager](ir::Instruction* user) {
|
|
|
|
if (!loop_->IsInsideLoop(user)) {
|
|
// Some optimization passes have been seen to leave dead phis in the
|
|
// IR so we check that if a phi is used outside of the loop that the
|
|
// user is not dead.
|
|
if (!(user->opcode() == SpvOpPhi &&
|
|
def_use_manager->NumUsers(user) == 0))
|
|
return false;
|
|
}
|
|
return true;
|
|
};
|
|
|
|
if (!def_use_manager->WhileEachUser(&inst, is_used_outside_loop)) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool LoopUtils::PartiallyUnroll(size_t factor) {
|
|
if (factor == 1 || !CanPerformUnroll()) return false;
|
|
|
|
// Create the unroller utility.
|
|
LoopUnrollerUtilsImpl unroller{context_,
|
|
loop_->GetHeaderBlock()->GetParent()};
|
|
unroller.Init(loop_);
|
|
|
|
// If the unrolling factor is larger than or the same size as the loop just
|
|
// fully unroll the loop.
|
|
if (factor >= unroller.GetLoopIterationCount()) {
|
|
unroller.FullyUnroll(loop_);
|
|
return true;
|
|
}
|
|
|
|
// If the loop unrolling factor is an residual number of iterations we need to
|
|
// let run the loop for the residual part then let it branch into the unrolled
|
|
// remaining part. We add one when calucating the remainder to take into
|
|
// account the one iteration already in the loop.
|
|
if (unroller.GetLoopIterationCount() % factor != 0) {
|
|
unroller.PartiallyUnrollResidualFactor(loop_, factor);
|
|
} else {
|
|
unroller.PartiallyUnroll(loop_, factor);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool LoopUtils::FullyUnroll() {
|
|
if (!CanPerformUnroll()) return false;
|
|
|
|
LoopUnrollerUtilsImpl unroller{context_,
|
|
loop_->GetHeaderBlock()->GetParent()};
|
|
|
|
unroller.Init(loop_);
|
|
unroller.FullyUnroll(loop_);
|
|
|
|
return true;
|
|
}
|
|
|
|
void LoopUtils::Finalize() {
|
|
// Clean up the loop descriptor to preserve the analysis.
|
|
|
|
ir::LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
|
|
LD->PostModificationCleanup();
|
|
}
|
|
|
|
/*
|
|
*
|
|
* Begin Pass.
|
|
*
|
|
*/
|
|
|
|
Pass::Status LoopUnroller::Process(ir::IRContext* c) {
|
|
context_ = c;
|
|
bool changed = false;
|
|
for (ir::Function& f : *c->module()) {
|
|
ir::LoopDescriptor* LD = context_->GetLoopDescriptor(&f);
|
|
for (ir::Loop& loop : *LD) {
|
|
LoopUtils loop_utils{c, &loop};
|
|
if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
|
|
continue;
|
|
}
|
|
|
|
loop_utils.FullyUnroll();
|
|
changed = true;
|
|
}
|
|
LD->PostModificationCleanup();
|
|
}
|
|
|
|
return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
|
|
}
|
|
|
|
} // namespace opt
|
|
} // namespace spvtools
|