Initial support for loop unrolling.

This patch adds initial support for loop unrolling in the form of a
series of utility classes which perform the unrolling. The pass can
be run with the command spirv-opt --loop-unroll. This will unroll
loops within the module which have the unroll hint set. The unroller
imposes a number of requirements on the loops it can unroll. These are
documented in the comments for the LoopUtils::CanPerformUnroll method in
loop_utils.h. Some of the restrictions will be lifted in future patches.
This commit is contained in:
Stephen McGroarty 2018-02-14 17:03:12 +00:00 committed by Steven Perron
parent 229ebc0665
commit dd8400e150
16 changed files with 4388 additions and 25 deletions

View File

@ -100,6 +100,7 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/local_single_store_elim_pass.cpp \
source/opt/local_ssa_elim_pass.cpp \
source/opt/loop_descriptor.cpp \
source/opt/loop_unroller.cpp \
source/opt/mem_pass.cpp \
source/opt/merge_return_pass.cpp \
source/opt/module.cpp \

View File

@ -512,6 +512,14 @@ Optimizer::PassToken CreateReplaceInvalidOpcodePass();
// Creates a pass that simplifies instructions using the instruction folder.
Optimizer::PassToken CreateSimplificationPass();
// Create loop unroller pass.
// Creates a pass to fully unroll loops which have the "Unroll" loop control
// mask set. The loops must meet a specific criteria in order to be unrolled
// safely this criteria is checked before doing the unroll by the
// LoopUtils::CanPerformUnroll method. Any loop that does not meet the criteria
// won't be unrolled. See CanPerformUnroll LoopUtils.h for more information.
Optimizer::PassToken CreateLoopFullyUnrollPass();
} // namespace spvtools
#endif // SPIRV_TOOLS_OPTIMIZER_HPP_

View File

@ -58,6 +58,7 @@ add_library(SPIRV-Tools-opt
local_ssa_elim_pass.h
log.h
loop_descriptor.h
loop_unroller.h
loop_utils.h
make_unique.h
mem_pass.h
@ -130,6 +131,7 @@ add_library(SPIRV-Tools-opt
local_ssa_elim_pass.cpp
loop_descriptor.cpp
loop_utils.cpp
loop_unroller.cpp
mem_pass.cpp
merge_return_pass.cpp
module.cpp

View File

@ -224,6 +224,19 @@ class DominatorTree {
return true;
}
// Applies the std::function |func| to all nodes in the dominator tree from
// |node| downwards. The boolean return from |func| is used to determine
// whether or not the children should also be traversed. Tree nodes are
// visited in a depth first pre-order.
void VisitChildrenIf(std::function<bool(DominatorTreeNode*)> func,
iterator node) {
if (func(&*node)) {
for (auto n : *node) {
VisitChildrenIf(func, n->df_begin());
}
}
}
// Returns the DominatorTreeNode associated with the basic block |bb|.
// If the |bb| is unknown to the dominator tree, it returns null.
inline DominatorTreeNode* GetTreeNode(ir::BasicBlock* bb) {

View File

@ -16,9 +16,9 @@
#define LIBSPIRV_OPT_IR_BUILDER_H_
#include "opt/basic_block.h"
#include "opt/constants.h"
#include "opt/instruction.h"
#include "opt/ir_context.h"
namespace spvtools {
namespace opt {
@ -136,6 +136,12 @@ class InstructionBuilder {
return AddInstruction(std::move(select));
}
// Adds a signed int32 constant to the binary.
// The |value| parameter is the constant value to be added.
ir::Instruction* Add32BitSignedIntegerConstant(int32_t value) {
return Add32BitConstantInteger<int32_t>(value, true);
}
// Create a composite construct.
// |type| should be a composite type and the number of elements it has should
// match the size od |ids|.
@ -151,6 +157,38 @@ class InstructionBuilder {
GetContext()->TakeNextId(), ops));
return AddInstruction(std::move(construct));
}
// Adds an unsigned int32 constant to the binary.
// The |value| parameter is the constant value to be added.
ir::Instruction* Add32BitUnsignedIntegerConstant(uint32_t value) {
return Add32BitConstantInteger<uint32_t>(value, false);
}
// Adds either a signed or unsigned 32 bit integer constant to the binary
// depedning on the |sign|. If |sign| is true then the value is added as a
// signed constant otherwise as an unsigned constant. If |sign| is false the
// value must not be a negative number.
template <typename T>
ir::Instruction* Add32BitConstantInteger(T value, bool sign) {
// Assert that we are not trying to store a negative number in an unsigned
// type.
if (!sign)
assert(value > 0 &&
"Trying to add a signed integer with an unsigned type!");
// Get or create the integer type.
analysis::Integer int_type(32, sign);
// Even if the value is negative we need to pass the bit pattern as a
// uint32_t to GetConstant.
uint32_t word = value;
// Create the constant value.
const opt::analysis::Constant* constant =
GetContext()->get_constant_mgr()->GetConstant(&int_type, {word});
// Create the OpConstant instruction using the type and the value.
return GetContext()->get_constant_mgr()->GetDefiningInstruction(constant);
}
ir::Instruction* AddCompositeExtract(
uint32_t type, uint32_t id_of_composite,

View File

@ -18,6 +18,7 @@
#include <utility>
#include <vector>
#include "constants.h"
#include "opt/cfg.h"
#include "opt/dominator_tree.h"
#include "opt/ir_builder.h"
@ -29,6 +30,107 @@
namespace spvtools {
namespace ir {
// Takes in a phi instruction |induction| and the loop |header| and returns the
// step operation of the loop.
ir::Instruction* Loop::GetInductionStepOperation(
const ir::Loop* loop, const ir::Instruction* induction) const {
// Induction must be a phi instruction.
assert(induction->opcode() == SpvOpPhi);
ir::Instruction* step = nullptr;
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
// Traverse the incoming operands of the phi instruction.
for (uint32_t operand_id = 1; operand_id < induction->NumInOperands();
operand_id += 2) {
// Incoming edge.
ir::BasicBlock* incoming_block =
context_->cfg()->block(induction->GetSingleWordInOperand(operand_id));
// Check if the block is dominated by header, and thus coming from within
// the loop.
if (loop->IsInsideLoop(incoming_block)) {
step = def_use_manager->GetDef(
induction->GetSingleWordInOperand(operand_id - 1));
break;
}
}
if (!step || !IsSupportedStepOp(step->opcode())) {
return nullptr;
}
return step;
}
// Returns true if the |step| operation is an induction variable step operation
// which is currently handled.
bool Loop::IsSupportedStepOp(SpvOp step) const {
switch (step) {
case SpvOp::SpvOpISub:
case SpvOp::SpvOpIAdd:
return true;
default:
return false;
}
}
bool Loop::IsSupportedCondition(SpvOp condition) const {
switch (condition) {
// <
case SpvOp::SpvOpULessThan:
case SpvOp::SpvOpSLessThan:
// >
case SpvOp::SpvOpUGreaterThan:
case SpvOp::SpvOpSGreaterThan:
return true;
default:
return false;
}
}
// Extract the initial value from the |induction| OpPhi instruction and store it
// in |value|. If the function couldn't find the initial value of |induction|
// return false.
bool Loop::GetInductionInitValue(const ir::Loop* loop,
const ir::Instruction* induction,
int64_t* value) const {
ir::Instruction* constant_instruction = nullptr;
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
for (uint32_t operand_id = 0; operand_id < induction->NumInOperands();
operand_id += 2) {
ir::BasicBlock* bb = context_->cfg()->block(
induction->GetSingleWordInOperand(operand_id + 1));
if (!loop->IsInsideLoop(bb)) {
constant_instruction = def_use_manager->GetDef(
induction->GetSingleWordInOperand(operand_id));
}
}
if (!constant_instruction) return false;
const opt::analysis::Constant* constant =
context_->get_constant_mgr()->FindDeclaredConstant(
constant_instruction->result_id());
if (!constant) return false;
if (value) {
const opt::analysis::Integer* type =
constant->AsIntConstant()->type()->AsInteger();
if (type->IsSigned()) {
*value = constant->AsIntConstant()->GetS32BitValue();
} else {
*value = constant->AsIntConstant()->GetU32BitValue();
}
}
return true;
}
Loop::Loop(IRContext* context, opt::DominatorAnalysis* dom_analysis,
BasicBlock* header, BasicBlock* continue_target,
BasicBlock* merge_target)
@ -37,12 +139,11 @@ Loop::Loop(IRContext* context, opt::DominatorAnalysis* dom_analysis,
loop_continue_(continue_target),
loop_merge_(merge_target),
loop_preheader_(nullptr),
parent_(nullptr) {
parent_(nullptr),
loop_is_marked_for_removal_(false) {
assert(context);
assert(dom_analysis);
loop_preheader_ = FindLoopPreheader(dom_analysis);
AddBasicBlockToLoop(header);
AddBasicBlockToLoop(continue_target);
}
BasicBlock* Loop::FindLoopPreheader(opt::DominatorAnalysis* dom_analysis) {
@ -92,7 +193,6 @@ bool Loop::IsInsideLoop(Instruction* inst) const {
bool Loop::IsBasicBlockInLoopSlow(const BasicBlock* bb) {
assert(bb->GetParent() && "The basic block does not belong to a function");
opt::DominatorAnalysis* dom_analysis =
context_->GetDominatorAnalysis(bb->GetParent(), *context_->cfg());
if (!dom_analysis->Dominates(GetHeaderBlock(), bb)) return false;
@ -219,14 +319,8 @@ void Loop::SetLatchBlock(BasicBlock* latch) {
void Loop::SetMergeBlock(BasicBlock* merge) {
#ifndef NDEBUG
assert(merge->GetParent() && "The basic block does not belong to a function");
CFG& cfg = *merge->GetParent()->GetParent()->context()->cfg();
for (uint32_t pred : cfg.preds(merge->id())) {
assert(IsInsideLoop(pred) &&
"A predecessor of the merge block does not belong to the loop");
}
assert(!IsInsideLoop(merge) && "The merge block is in the loop");
#endif // NDEBUG
assert(!IsInsideLoop(merge) && "The merge block is in the loop");
SetMergeBlockImpl(merge);
if (GetHeaderBlock()->GetLoopMergeInst()) {
@ -327,6 +421,7 @@ LoopDescriptor::~LoopDescriptor() { ClearLoops(); }
void LoopDescriptor::PopulateList(const Function* f) {
IRContext* context = f->GetParent()->context();
opt::DominatorAnalysis* dom_analysis =
context->GetDominatorAnalysis(f, *context->cfg());
@ -384,7 +479,7 @@ void LoopDescriptor::PopulateList(const Function* f) {
make_range(node.df_begin(), node.df_end())) {
// Check if we are in the loop.
if (dom_tree.Dominates(dom_merge_node, &loop_node)) continue;
current_loop->AddBasicBlockToLoop(loop_node.bb_);
current_loop->AddBasicBlock(loop_node.bb_);
basic_block_to_loop_.insert(
std::make_pair(loop_node.bb_->id(), current_loop));
}
@ -395,12 +490,262 @@ void LoopDescriptor::PopulateList(const Function* f) {
}
}
ir::BasicBlock* Loop::FindConditionBlock() const {
const ir::Function& function = *loop_merge_->GetParent();
ir::BasicBlock* condition_block = nullptr;
const opt::DominatorAnalysis* dom_analysis =
context_->GetDominatorAnalysis(&function, *context_->cfg());
ir::BasicBlock* bb = dom_analysis->ImmediateDominator(loop_merge_);
if (!bb) return nullptr;
const ir::Instruction& branch = *bb->ctail();
// Make sure the branch is a conditional branch.
if (branch.opcode() != SpvOpBranchConditional) return nullptr;
// Make sure one of the two possible branches is to the merge block.
if (branch.GetSingleWordInOperand(1) == loop_merge_->id() ||
branch.GetSingleWordInOperand(2) == loop_merge_->id()) {
condition_block = bb;
}
return condition_block;
}
bool Loop::FindNumberOfIterations(const ir::Instruction* induction,
const ir::Instruction* branch_inst,
size_t* iterations_out,
int64_t* step_value_out,
int64_t* init_value_out) const {
// From the branch instruction find the branch condition.
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
// Condition instruction from the OpConditionalBranch.
ir::Instruction* condition =
def_use_manager->GetDef(branch_inst->GetSingleWordOperand(0));
assert(IsSupportedCondition(condition->opcode()));
// Get the constant manager from the ir context.
opt::analysis::ConstantManager* const_manager = context_->get_constant_mgr();
// Find the constant value used by the condition variable. Exit out if it
// isn't a constant int.
const opt::analysis::Constant* upper_bound =
const_manager->FindDeclaredConstant(condition->GetSingleWordOperand(3));
if (!upper_bound) return false;
// Must be integer because of the opcode on the condition.
int64_t condition_value = 0;
const opt::analysis::Integer* type =
upper_bound->AsIntConstant()->type()->AsInteger();
if (type->IsSigned()) {
condition_value = upper_bound->AsIntConstant()->GetS32BitValue();
} else {
condition_value = upper_bound->AsIntConstant()->GetU32BitValue();
}
// Find the instruction which is stepping through the loop.
ir::Instruction* step_inst = GetInductionStepOperation(this, induction);
if (!step_inst) return false;
// Find the constant value used by the condition variable.
const opt::analysis::Constant* step_constant =
const_manager->FindDeclaredConstant(step_inst->GetSingleWordOperand(3));
if (!step_constant) return false;
// Must be integer because of the opcode on the condition.
int64_t step_value = 0;
const opt::analysis::Integer* step_type =
step_constant->AsIntConstant()->type()->AsInteger();
if (step_type->IsSigned()) {
step_value = step_constant->AsIntConstant()->GetS32BitValue();
} else {
step_value = step_constant->AsIntConstant()->GetU32BitValue();
}
// If this is a subtraction step we should negate the step value.
if (step_inst->opcode() == SpvOp::SpvOpISub) {
step_value = -step_value;
}
// Find the inital value of the loop and make sure it is a constant integer.
int64_t init_value = 0;
if (!GetInductionInitValue(this, induction, &init_value)) return false;
// If iterations is non null then store the value in that.
if (iterations_out) {
int64_t num_itrs = GetIterations(condition->opcode(), condition_value,
init_value, step_value);
// If the loop body will not be reached return false.
if (num_itrs <= 0) {
return false;
}
assert(static_cast<size_t>(num_itrs) <= std::numeric_limits<size_t>::max());
*iterations_out = static_cast<size_t>(num_itrs);
}
if (step_value_out) {
*step_value_out = step_value;
}
if (init_value_out) {
*init_value_out = init_value;
}
return true;
}
// We retrieve the number of iterations using the following formula, diff /
// |step_value| where diff is calculated differently according to the
// |condition| and uses the |condition_value| and |init_value|. If diff /
// |step_value| is NOT cleanly divisable then we add one to the sum.
int64_t Loop::GetIterations(SpvOp condition, int64_t condition_value,
int64_t init_value, int64_t step_value) const {
int64_t diff = 0;
// Take the abs of - step values.
step_value = llabs(step_value);
switch (condition) {
case SpvOp::SpvOpSLessThan:
case SpvOp::SpvOpULessThan: {
diff = condition_value - init_value;
break;
}
case SpvOp::SpvOpSGreaterThan:
case SpvOp::SpvOpUGreaterThan: {
diff = init_value - condition_value;
break;
}
default:
assert(false &&
"Could not retrieve number of iterations from the loop condition. "
"Condition is not supported.");
}
int64_t result = diff / step_value;
if (diff % step_value != 0) {
result += 1;
}
return result;
}
ir::Instruction* Loop::FindInductionVariable(
const ir::BasicBlock* condition_block) const {
// Find the branch instruction.
const ir::Instruction& branch_inst = *condition_block->ctail();
ir::Instruction* induction = nullptr;
// Verify that the branch instruction is a conditional branch.
if (branch_inst.opcode() == SpvOp::SpvOpBranchConditional) {
// From the branch instruction find the branch condition.
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
// Find the instruction representing the condition used in the conditional
// branch.
ir::Instruction* condition =
def_use_manager->GetDef(branch_inst.GetSingleWordOperand(0));
// Ensure that the condition is a less than operation.
if (condition && IsSupportedCondition(condition->opcode())) {
// The left hand side operand of the operation.
ir::Instruction* variable_inst =
def_use_manager->GetDef(condition->GetSingleWordOperand(2));
// Make sure the variable instruction used is a phi.
if (!variable_inst || variable_inst->opcode() != SpvOpPhi) return nullptr;
// Make sure the phi instruction only has two incoming blocks. Each
// incoming block will be represented by two in operands in the phi
// instruction, the value and the block which that value came from. We
// assume the cannocalised phi will have two incoming values, one from the
// preheader and one from the continue block.
size_t max_supported_operands = 4;
if (variable_inst->NumInOperands() == max_supported_operands) {
// The operand index of the first incoming block label.
uint32_t operand_label_1 = 1;
// The operand index of the second incoming block label.
uint32_t operand_label_2 = 3;
// Make sure one of them is the preheader.
if (variable_inst->GetSingleWordInOperand(operand_label_1) !=
loop_preheader_->id() &&
variable_inst->GetSingleWordInOperand(operand_label_2) !=
loop_preheader_->id()) {
return nullptr;
}
// And make sure that the other is the latch block.
if (variable_inst->GetSingleWordInOperand(operand_label_1) !=
loop_continue_->id() &&
variable_inst->GetSingleWordInOperand(operand_label_2) !=
loop_continue_->id()) {
return nullptr;
}
} else {
return nullptr;
}
if (!FindNumberOfIterations(variable_inst, &branch_inst, nullptr))
return nullptr;
induction = variable_inst;
}
}
return induction;
}
// Add and remove loops which have been marked for addition and removal to
// maintain the state of the loop descriptor class.
void LoopDescriptor::PostModificationCleanup() {
LoopContainerType loops_to_remove_;
for (ir::Loop* loop : loops_) {
if (loop->IsMarkedForRemoval()) {
loops_to_remove_.push_back(loop);
if (loop->HasParent()) {
loop->GetParent()->RemoveChildLoop(loop);
}
}
}
for (ir::Loop* loop : loops_to_remove_) {
loops_.erase(std::find(loops_.begin(), loops_.end(), loop));
}
for (auto& pair : loops_to_add_) {
ir::Loop* parent = pair.first;
ir::Loop* loop = pair.second;
if (parent) {
loop->SetParent(nullptr);
parent->AddNestedLoop(loop);
for (uint32_t block_id : loop->GetBlocks()) {
parent->AddBasicBlock(block_id);
}
}
loops_.emplace_back(loop);
}
loops_to_add_.clear();
}
void LoopDescriptor::ClearLoops() {
for (Loop* loop : loops_) {
delete loop;
}
loops_.clear();
}
} // namespace ir
} // namespace spvtools

View File

@ -24,6 +24,7 @@
#include <vector>
#include "opt/basic_block.h"
#include "opt/module.h"
#include "opt/tree_iterator.h"
namespace spvtools {
@ -52,7 +53,8 @@ class Loop {
loop_continue_(nullptr),
loop_merge_(nullptr),
loop_preheader_(nullptr),
parent_(nullptr) {}
parent_(nullptr),
loop_is_marked_for_removal_(false) {}
Loop(IRContext* context, opt::DominatorAnalysis* analysis, BasicBlock* header,
BasicBlock* continue_target, BasicBlock* merge_target);
@ -144,6 +146,8 @@ class Loop {
return lvl;
}
inline size_t NumImmediateChildren() const { return nested_loops_.size(); }
// Adds |nested| as a nested loop of this loop. Automatically register |this|
// as the parent of |nested|.
inline void AddNestedLoop(Loop* nested) {
@ -180,6 +184,21 @@ class Loop {
// Returns true if the instruction |inst| is inside this loop.
bool IsInsideLoop(Instruction* inst) const;
// Adds the Basic Block |bb| to this loop and its parents.
void AddBasicBlock(const BasicBlock* bb) { AddBasicBlock(bb->id()); }
// Adds the Basic Block with |id| to this loop and its parents.
void AddBasicBlock(uint32_t id) {
for (Loop* loop = this; loop != nullptr; loop = loop->parent_) {
loop_basic_blocks_.insert(id);
}
}
// Removes all the basic blocks from the set of basic blocks within the loop.
// This does not affect any of the stored pointers to the header, preheader,
// merge, or continue blocks.
void ClearBlocks() { loop_basic_blocks_.clear(); }
// Adds the Basic Block |bb| this loop and its parents.
void AddBasicBlockToLoop(const BasicBlock* bb) {
assert(IsBasicBlockInLoopSlow(bb) &&
@ -188,11 +207,58 @@ class Loop {
AddBasicBlock(bb);
}
// Adds the Basic Block |bb| this loop and its parents.
void AddBasicBlock(const BasicBlock* bb) {
for (Loop* loop = this; loop != nullptr; loop = loop->parent_) {
loop_basic_blocks_.insert(bb->id());
// This function uses the |condition| to find the induction variable within
// the loop. This only works if the loop is bound by a single condition and a
// single induction variable.
ir::Instruction* FindInductionVariable(const ir::BasicBlock* condition) const;
// Returns the number of iterations within a loop when given the |induction|
// variable and the loop |condition| check. It stores the found number of
// iterations in the output parameter |iterations| and optionally, the step
// value in |step_value| and the initial value of the induction variable in
// |init_value|.
bool FindNumberOfIterations(const ir::Instruction* induction,
const ir::Instruction* condition,
size_t* iterations,
int64_t* step_amount = nullptr,
int64_t* init_value = nullptr) const;
// Returns the value of the OpLoopMerge control operand as a bool. Loop
// control can be None(0), Unroll(1), or DontUnroll(2). This function returns
// true if it is set to Unroll.
inline bool HasUnrollLoopControl() const {
assert(loop_header_);
if (!loop_header_->GetLoopMergeInst()) return false;
return loop_header_->GetLoopMergeInst()->GetSingleWordOperand(2) == 1;
}
// Finds the conditional block with a branch to the merge and continue blocks
// within the loop body.
ir::BasicBlock* FindConditionBlock() const;
// Remove the child loop form this loop.
inline void RemoveChildLoop(Loop* loop) {
nested_loops_.erase(
std::find(nested_loops_.begin(), nested_loops_.end(), loop));
loop->SetParent(nullptr);
}
// Mark this loop to be removed later by a call to
// LoopDescriptor::PostModificationCleanup.
inline void MarkLoopForRemoval() { loop_is_marked_for_removal_ = true; }
// Returns whether or not this loop has been marked for removal.
inline bool IsMarkedForRemoval() const { return loop_is_marked_for_removal_; }
// Returns true if all nested loops have been marked for removal.
inline bool AreAllChildrenMarkedForRemoval() const {
for (const Loop* child : nested_loops_) {
if (!child->IsMarkedForRemoval()) {
return false;
}
}
return true;
}
// Sets the parent loop of this loop, that is, a loop which contains this loop
@ -206,6 +272,28 @@ class Loop {
// loop
bool AreAllOperandsOutsideLoop(IRContext* context, Instruction* inst);
// Extract the initial value from the |induction| variable and store it in
// |value|. If the function couldn't find the initial value of |induction|
// return false.
bool GetInductionInitValue(const ir::Loop* loop,
const ir::Instruction* induction,
int64_t* value) const;
// Takes in a phi instruction |induction| and the loop |header| and returns
// the step operation of the loop.
ir::Instruction* GetInductionStepOperation(
const ir::Loop* loop, const ir::Instruction* induction) const;
// Returns true if we can deduce the number of loop iterations in the step
// operation |step|. IsSupportedCondition must also be true for the condition
// instruction.
bool IsSupportedStepOp(SpvOp step) const;
// Returns true if we can deduce the number of loop iterations in the
// condition operation |condition|. IsSupportedStepOp must also be true for
// the step instruction.
bool IsSupportedCondition(SpvOp condition) const;
private:
IRContext* context_;
// The block which marks the start of the loop.
@ -244,6 +332,17 @@ class Loop {
// Sets |merge| as the loop merge block. No checks are performed here.
inline void SetMergeBlockImpl(BasicBlock* merge) { loop_merge_ = merge; }
// Each differnt loop |condition| affects how we calculate the number of
// iterations using the |condition_value|, |init_value|, and |step_values| of
// the induction variable. This method will return the number of iterations in
// a loop with those values for a given |condition|.
int64_t GetIterations(SpvOp condition, int64_t condition_value,
int64_t init_value, int64_t step_value) const;
// This is to allow for loops to be removed mid iteration without invalidating
// the iterators.
bool loop_is_marked_for_removal_;
// This is only to allow LoopDescriptor::dummy_top_loop_ to add top level
// loops as child.
friend class LoopDescriptor;
@ -317,10 +416,21 @@ class LoopDescriptor {
basic_block_to_loop_[bb_id] = loop;
}
// Mark the loop |loop_to_add| as needing to be added when the user calls
// PostModificationCleanup. |parent| may be null.
inline void AddLoop(ir::Loop* loop_to_add, ir::Loop* parent) {
loops_to_add_.emplace_back(std::make_pair(parent, loop_to_add));
}
// Should be called to preserve the LoopAnalysis after loops have been marked
// for addition with AddLoop or MarkLoopForRemoval.
void PostModificationCleanup();
private:
// TODO(dneto): This should be a vector of unique_ptr. But VisualStudio 2013
// is unable to compile it.
using LoopContainerType = std::vector<Loop*>;
using LoopsToAddContainerType = std::vector<std::pair<Loop*, Loop*>>;
// Creates loop descriptors for the function |f|.
void PopulateList(const Function* f);
@ -338,9 +448,15 @@ class LoopDescriptor {
// A list of all the loops in the function. This variable owns the Loop
// objects.
LoopContainerType loops_;
// Dummy root: this "loop" is only there to help iterators creation.
Loop dummy_top_loop_;
std::unordered_map<uint32_t, Loop*> basic_block_to_loop_;
// List of the loops marked for addition when PostModificationCleanup is
// called.
LoopsToAddContainerType loops_to_add_;
};
} // namespace ir

View File

@ -0,0 +1,939 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "opt/loop_unroller.h"
#include <map>
#include <memory>
#include <utility>
#include "opt/ir_builder.h"
#include "opt/loop_utils.h"
// Implements loop util unrolling functionality for fully and partially
// unrolling loops. Given a factor it will duplicate the loop that many times,
// appending each one to the end of the old loop and removing backedges, to
// create a new unrolled loop.
//
// 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
// loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
// validate that a given loop can be unrolled. That method (along with the
// constructor of loop) checks that the IR is in the expected canonicalised
// format.
//
// 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
// perform the unrolling. This implements helper methods to copy the loop basic
// blocks and remap the ids of instructions used inside them.
//
// 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
// actually performs the loop duplication. It does this by creating a
// LoopUnrollState object and then copying the loop as given by the factor
// parameter. The LoopUnrollState object retains the state of the unroller
// between the loop body copies as each iteration needs information on the last
// to adjust the phi induction variable, adjust the OpLoopMerge instruction in
// the main loop header, and change the previous continue block to point to the
// new header and the new continue block to the main loop header.
//
// 4 - If the loop is to be fully unrolled then it is simply closed after step
// 3, with the OpLoopMerge being deleted, the backedge removed, and the
// condition blocks folded.
//
// 5 - If it is being partially unrolled: if the unrolling factor leaves the
// loop with an even number of bodies with respect to the number of loop
// iterations then step 3 is all that is needed. If it is uneven then we need to
// duplicate the loop completely and unroll the duplicated loop to cover the
// residual part and adjust the first loop to cover only the "even" part. For
// instance if you request an unroll factor of 3 on a loop with 10 iterations
// then copying the body three times would leave you with three bodies in the
// loop
// where the loop still iterates over each 4 times. So we make two loops one
// iterating once then a second loop of three iterating 3 times.
namespace spvtools {
namespace opt {
namespace {
// This utility class encapsulates some of the state we need to maintain between
// loop unrolls. Specifically it maintains key blocks and the induction variable
// in the current loop duplication step and the blocks from the previous one.
// This is because each step of the unroll needs to use data from both the
// preceding step and the original loop.
struct LoopUnrollState {
LoopUnrollState()
: previous_phi_(nullptr),
previous_continue_block_(nullptr),
previous_condition_block_(nullptr),
new_phi(nullptr),
new_continue_block(nullptr),
new_condition_block(nullptr),
new_header_block(nullptr) {}
// Initialize from the loop descriptor class.
LoopUnrollState(ir::Instruction* induction, ir::BasicBlock* continue_block,
ir::BasicBlock* condition)
: previous_phi_(induction),
previous_continue_block_(continue_block),
previous_condition_block_(condition),
new_phi(nullptr),
new_continue_block(nullptr),
new_condition_block(nullptr),
new_header_block(nullptr) {}
// Swap the state so that the new nodes are now the previous nodes.
void NextIterationState() {
previous_phi_ = new_phi;
previous_continue_block_ = new_continue_block;
previous_condition_block_ = new_condition_block;
// Clear new nodes.
new_phi = nullptr;
new_continue_block = nullptr;
new_condition_block = nullptr;
new_header_block = nullptr;
// Clear new block/instruction maps.
new_blocks.clear();
new_inst.clear();
}
// The induction variable from the immediately preceding loop body.
ir::Instruction* previous_phi_;
// The previous continue block. The backedge will be removed from this and
// added to the new continue block.
ir::BasicBlock* previous_continue_block_;
// The previous condition block. This may be folded to flatten the loop.
ir::BasicBlock* previous_condition_block_;
// The new induction variable.
ir::Instruction* new_phi;
// The new continue block.
ir::BasicBlock* new_continue_block;
// The new condition block.
ir::BasicBlock* new_condition_block;
// The new header block.
ir::BasicBlock* new_header_block;
// A mapping of new block ids to the original blocks which they were copied
// from.
std::unordered_map<uint32_t, ir::BasicBlock*> new_blocks;
// A mapping of new instruction ids to the instruction ids from which they
// were copied.
std::unordered_map<uint32_t, uint32_t> new_inst;
};
// This class implements the actual unrolling. It uses a LoopUnrollState to
// maintain the state of the unrolling inbetween steps.
class LoopUnrollerUtilsImpl {
public:
using BasicBlockListTy = std::vector<std::unique_ptr<ir::BasicBlock>>;
LoopUnrollerUtilsImpl(ir::IRContext* c, ir::Function* function)
: context_(c),
function_(*function),
loop_condition_block_(nullptr),
loop_induction_variable_(nullptr),
number_of_loop_iterations_(0),
loop_step_value_(0),
loop_init_value_(0) {}
// Unroll the |loop| by given |factor| by copying the whole body |factor|
// times. The resulting basicblock structure will remain a loop.
void PartiallyUnroll(ir::Loop*, size_t factor);
// If partially unrolling the |loop| would leave the loop with too many bodies
// for its number of iterations then this method should be used. This method
// will duplicate the |loop| completely, making the duplicated loop the
// successor of the original's merge block. The original loop will have its
// condition changed to loop over the residual part and the duplicate will be
// partially unrolled. The resulting structure will be two loops.
void PartiallyUnrollResidualFactor(ir::Loop* loop, size_t factor);
// Fully unroll the |loop| by copying the full body by the total number of
// loop iterations, folding all conditions, and removing the backedge from the
// continue block to the header.
void FullyUnroll(ir::Loop* loop);
// Get the ID of the variable in the |phi| paired with |label|.
uint32_t GetPhiDefID(const ir::Instruction* phi, uint32_t label) const;
// Close the loop by removing the OpLoopMerge from the |loop| header block and
// making the backedge point to the merge block.
void CloseUnrolledLoop(ir::Loop* loop);
// Remove the OpConditionalBranch instruction inside |conditional_block| used
// to branch to either exit or continue the loop and replace it with an
// unconditional OpBranch to block |new_target|.
void FoldConditionBlock(ir::BasicBlock* condtion_block, uint32_t new_target);
// Add all blocks_to_add_ to function_ at the |insert_point|.
void AddBlocksToFunction(const ir::BasicBlock* insert_point);
// Duplicates the |old_loop|, cloning each body and remaping the ids without
// removing instructions or changing relative structure. Result will be stored
// in |new_loop|.
void DuplicateLoop(ir::Loop* old_loop, ir::Loop* new_loop);
inline size_t GetLoopIterationCount() const {
return number_of_loop_iterations_;
}
// Extracts the initial state information from the |loop|.
void Init(ir::Loop* loop);
private:
// Remap all the in |basic_block| to new IDs and keep the mapping of new ids
// to old
// ids. |loop| is used to identify special loop blocks (header, continue,
// ect).
void AssignNewResultIds(ir::BasicBlock* basic_block);
// Using the map built by AssignNewResultIds, for each instruction in
// |basic_block| use
// that map to substitute the IDs used by instructions (in the operands) with
// the new ids.
void RemapOperands(ir::BasicBlock* basic_block);
// Copy the whole body of the loop, all blocks dominated by the |loop| header
// and not dominated by the |loop| merge. The copied body will be linked to by
// the old |loop| continue block and the new body will link to the |loop|
// header via the new continue block. |eliminate_conditions| is used to decide
// whether or not to fold all the condition blocks other than the last one.
void CopyBody(ir::Loop* loop, bool eliminate_conditions);
// Copy a given |block_to_copy| in the |loop| and record the mapping of the
// old/new ids. |preserve_instructions| determines whether or not the method
// will modify (other than result_id) instructions which are copied.
void CopyBasicBlock(ir::Loop* loop, const ir::BasicBlock* block_to_copy,
bool preserve_instructions);
// The actual implementation of the unroll step. Unrolls |loop| by given
// |factor| by copying the body by |factor| times. Also propagates the
// induction variable value throughout the copies.
void Unroll(ir::Loop* loop, size_t factor);
// Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
// as computed by the method ComputeLoopOrderedBlocks.
void ComputeLoopOrderedBlocks(ir::Loop* loop);
// Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
// the parent exists.
void AddBlocksToLoop(ir::Loop* loop) const;
// A pointer to the IRContext. Used to add/remove instructions and for usedef
// chains.
ir::IRContext* context_;
// A reference the function the loop is within.
ir::Function& function_;
// A list of basic blocks to be added to the loop at the end of an unroll
// step.
BasicBlockListTy blocks_to_add_;
// List of instructions which are now dead and can be removed.
std::vector<ir::Instruction*> dead_instructions_;
// Maintains the current state of the transform between calls to unroll.
LoopUnrollState state_;
// An ordered list containing the loop basic blocks.
std::vector<ir::BasicBlock*> loop_blocks_inorder_;
// The block containing the condition check which contains a conditional
// branch to the merge and continue block.
ir::BasicBlock* loop_condition_block_;
// The induction variable of the loop.
ir::Instruction* loop_induction_variable_;
// The number of loop iterations that the loop would preform pre-unroll.
size_t number_of_loop_iterations_;
// The amount that the loop steps each iteration.
int64_t loop_step_value_;
// The value the loop starts stepping from.
int64_t loop_init_value_;
};
/*
* Static helper functions.
*/
// Retrieve the index of the OpPhi instruction |phi| which corresponds to the
// incoming |block| id.
static uint32_t GetPhiIndexFromLabel(const ir::BasicBlock* block,
const ir::Instruction* phi) {
for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
if (block->id() == phi->GetSingleWordInOperand(i)) {
return i;
}
}
assert(false && "Could not find operand in instruction.");
return 0;
}
void LoopUnrollerUtilsImpl::Init(ir::Loop* loop) {
loop_condition_block_ = loop->FindConditionBlock();
// When we reinit the second loop during PartiallyUnrollResidualFactor we need
// to use the cached value from the duplicate step as the dominator tree
// basded solution, loop->FindConditionBlock, requires all the nodes to be
// connected up with the correct branches. They won't be at this point.
if (!loop_condition_block_) {
loop_condition_block_ = state_.new_condition_block;
}
assert(loop_condition_block_);
loop_induction_variable_ = loop->FindInductionVariable(loop_condition_block_);
assert(loop_induction_variable_);
bool found = loop->FindNumberOfIterations(
loop_induction_variable_, &*loop_condition_block_->ctail(),
&number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
(void)found; // To silence unused variable warning on release builds.
assert(found);
ComputeLoopOrderedBlocks(loop);
}
// This function is used to partially unroll the loop when the factor provided
// would normally lead to an illegal optimization. Instead of just unrolling the
// loop it creates two loops and unrolls one and adjusts the condition on the
// other. The end result being that the new loop pair iterates over the correct
// number of bodies.
void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop,
size_t factor) {
// Create a new merge block for the first loop.
std::unique_ptr<ir::Instruction> new_label{new ir::Instruction(
context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
std::unique_ptr<ir::BasicBlock> new_exit_bb{
new ir::BasicBlock(std::move(new_label))};
// Save the id of the block before we move it.
uint32_t new_merge_id = new_exit_bb->id();
// Add the block the list of blocks to add, we want this merge block to be
// right at the start of the new blocks.
blocks_to_add_.push_back(std::move(new_exit_bb));
ir::BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
ir::Instruction& original_conditional_branch = *loop_condition_block_->tail();
// Duplicate the loop, providing access to the blocks of both loops.
// This is a naked new due to the VS2013 requirement of not having unique
// pointers in vectors, as it will be inserted into a vector with
// loop_descriptor.AddLoop.
ir::Loop* new_loop = new ir::Loop(*loop);
// Clear the basic blocks of the new loop.
new_loop->ClearBlocks();
DuplicateLoop(loop, new_loop);
// Add the blocks to the function.
AddBlocksToFunction(loop->GetMergeBlock());
blocks_to_add_.clear();
InstructionBuilder builder{context_, new_exit_bb_raw};
// Make the first loop branch to the second.
builder.AddBranch(new_loop->GetHeaderBlock()->id());
loop_condition_block_ = state_.new_condition_block;
loop_induction_variable_ = state_.new_phi;
// Unroll the new loop by the factor with the usual -1 to account for the
// existing block iteration.
Unroll(new_loop, factor);
// We need to account for the initial body when calculating the remainder.
int64_t remainder = loop_init_value_ +
(number_of_loop_iterations_ % factor) * loop_step_value_;
assert(remainder > std::numeric_limits<int32_t>::min() &&
remainder < std::numeric_limits<int32_t>::max());
ir::Instruction* new_constant = nullptr;
// If the remainder is negative then we add a signed constant, otherwise just
// add an unsigned constant.
if (remainder < 0) {
new_constant =
builder.Add32BitSignedIntegerConstant(static_cast<int32_t>(remainder));
} else {
new_constant = builder.Add32BitUnsignedIntegerConstant(
static_cast<int32_t>(remainder));
}
uint32_t constant_id = new_constant->result_id();
// Add the merge block to the back of the binary.
blocks_to_add_.push_back(
std::unique_ptr<ir::BasicBlock>(new_loop->GetMergeBlock()));
AddBlocksToLoop(new_loop);
// Add the blocks to the function.
AddBlocksToFunction(loop->GetMergeBlock());
// Reset the usedef analysis.
context_->InvalidateAnalysesExceptFor(
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
// Update the condition check.
ir::Instruction* condition_check = def_use_manager->GetDef(
original_conditional_branch.GetSingleWordOperand(0));
// This should have been checked by the LoopUtils::CanPerformUnroll function
// before entering this.
assert(condition_check->opcode() == SpvOpSLessThan);
condition_check->SetInOperand(1, {constant_id});
// Update the next phi node. The phi will have a constant value coming in from
// the preheader block. For the duplicated loop we need to update the constant
// to be the amount of iterations covered by the first loop and the incoming
// block to be the first loops new merge block.
uint32_t phi_incoming_index =
GetPhiIndexFromLabel(loop->GetPreHeaderBlock(), loop_induction_variable_);
loop_induction_variable_->SetInOperand(phi_incoming_index - 1, {constant_id});
loop_induction_variable_->SetInOperand(phi_incoming_index, {new_merge_id});
context_->InvalidateAnalysesExceptFor(
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
ir::LoopDescriptor& loop_descriptor =
*context_->GetLoopDescriptor(&function_);
loop_descriptor.AddLoop(new_loop, loop->GetParent());
}
// Duplicate the |loop| body |factor| number of times while keeping the loop
// backedge intact.
void LoopUnrollerUtilsImpl::PartiallyUnroll(ir::Loop* loop, size_t factor) {
Unroll(loop, factor);
AddBlocksToLoop(loop);
AddBlocksToFunction(loop->GetMergeBlock());
}
// Duplicate the |loop| body |factor| number of times while keeping the loop
// backedge intact.
void LoopUnrollerUtilsImpl::Unroll(ir::Loop* loop, size_t factor) {
state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
loop_condition_block_};
for (size_t i = 0; i < factor - 1; ++i) {
CopyBody(loop, true);
}
uint32_t phi_index = GetPhiIndexFromLabel(state_.previous_continue_block_,
state_.previous_phi_);
uint32_t phi_variable =
state_.previous_phi_->GetSingleWordInOperand(phi_index - 1);
uint32_t phi_label = state_.previous_phi_->GetSingleWordInOperand(phi_index);
ir::Instruction* original_phi = loop_induction_variable_;
// SetInOperands are offset by two.
original_phi->SetInOperand(phi_index - 1, {phi_variable});
original_phi->SetInOperand(phi_index, {phi_label});
}
// Fully unroll the loop by partially unrolling it by the number of loop
// iterations minus one for the body already accounted for.
void LoopUnrollerUtilsImpl::FullyUnroll(ir::Loop* loop) {
// We unroll the loop by number of iterations in the loop.
Unroll(loop, number_of_loop_iterations_);
// The first condition block is preserved until now so it can be copied.
FoldConditionBlock(loop_condition_block_, 1);
// Delete the OpLoopMerge and remove the backedge to the header.
CloseUnrolledLoop(loop);
// Mark the loop for later deletion. This allows us to preserve the loop
// iterators but still disregard dead loops.
loop->MarkLoopForRemoval();
// If the loop has a parent add the new blocks to the parent.
if (loop->GetParent()) {
AddBlocksToLoop(loop->GetParent());
}
// Add the blocks to the function.
AddBlocksToFunction(loop->GetMergeBlock());
// Invalidate all analyses.
context_->InvalidateAnalysesExceptFor(
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
}
// Copy a given basic block, give it a new result_id, and store the new block
// and the id mapping in the state. |preserve_instructions| is used to determine
// whether or not this function should edit instructions other than the
// |result_id|.
void LoopUnrollerUtilsImpl::CopyBasicBlock(ir::Loop* loop,
const ir::BasicBlock* itr,
bool preserve_instructions) {
// Clone the block exactly, including the IDs.
ir::BasicBlock* basic_block = itr->Clone(context_);
basic_block->SetParent(itr->GetParent());
// Assign each result a new unique ID and keep a mapping of the old ids to
// the new ones.
AssignNewResultIds(basic_block);
// If this is the continue block we are copying.
if (itr == loop->GetLatchBlock()) {
// Make the OpLoopMerge point to this block for the continue.
if (!preserve_instructions) {
ir::Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
merge_inst->SetInOperand(1, {basic_block->id()});
}
state_.new_continue_block = basic_block;
}
// If this is the header block we are copying.
if (itr == loop->GetHeaderBlock()) {
state_.new_header_block = basic_block;
if (!preserve_instructions) {
// Remove the loop merge instruction if it exists.
ir::Instruction* merge_inst = basic_block->GetLoopMergeInst();
if (merge_inst) context_->KillInst(merge_inst);
}
}
// If this is the condition block we are copying.
if (itr == loop_condition_block_) {
state_.new_condition_block = basic_block;
}
// Add this block to the list of blocks to add to the function at the end of
// the unrolling process.
blocks_to_add_.push_back(std::unique_ptr<ir::BasicBlock>(basic_block));
// Keep tracking the old block via a map.
state_.new_blocks[itr->id()] = basic_block;
}
void LoopUnrollerUtilsImpl::CopyBody(ir::Loop* loop,
bool eliminate_conditions) {
// Copy each basic block in the loop, give them new ids, and save state
// information.
for (const ir::BasicBlock* itr : loop_blocks_inorder_) {
CopyBasicBlock(loop, itr, false);
}
// Set the previous continue block to point to the new header.
ir::Instruction& continue_branch = *state_.previous_continue_block_->tail();
continue_branch.SetInOperand(0, {state_.new_header_block->id()});
// As the algorithm copies the original loop blocks exactly, the tail of the
// latch block on iterations after the first one will be a branch to the new
// header and not the actual loop header. The last continue block in the loop
// should always be a backedge to the global header.
ir::Instruction& new_continue_branch = *state_.new_continue_block->tail();
new_continue_branch.SetInOperand(0, {loop->GetHeaderBlock()->id()});
// Update references to the old phi node with the actual variable.
const ir::Instruction* induction = loop_induction_variable_;
state_.new_inst[induction->result_id()] =
GetPhiDefID(state_.previous_phi_, state_.previous_continue_block_->id());
if (eliminate_conditions &&
state_.new_condition_block != loop_condition_block_) {
FoldConditionBlock(state_.new_condition_block, 1);
}
// Only reference to the header block is the backedge in the latch block,
// don't change this.
state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
for (auto& pair : state_.new_blocks) {
RemapOperands(pair.second);
}
dead_instructions_.push_back(state_.new_phi);
// Swap the state so the new is now the previous.
state_.NextIterationState();
}
uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const ir::Instruction* phi,
uint32_t label) const {
for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
if (phi->GetSingleWordOperand(operand) == label) {
return phi->GetSingleWordOperand(operand - 1);
}
}
return 0;
}
void LoopUnrollerUtilsImpl::FoldConditionBlock(ir::BasicBlock* condition_block,
uint32_t operand_label) {
// Remove the old conditional branch to the merge and continue blocks.
ir::Instruction& old_branch = *condition_block->tail();
uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
context_->KillInst(&old_branch);
// Add the new unconditional branch to the merge block.
InstructionBuilder builder{context_, condition_block};
builder.AddBranch(new_target);
}
void LoopUnrollerUtilsImpl::CloseUnrolledLoop(ir::Loop* loop) {
// Remove the OpLoopMerge instruction from the function.
ir::Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
context_->KillInst(merge_inst);
// Remove the final backedge to the header and make it point instead to the
// merge block.
state_.previous_continue_block_->tail()->SetInOperand(
0, {loop->GetMergeBlock()->id()});
// Remove the induction variable as the phi will now be invalid. Replace all
// uses with the constant initializer value (all uses of the phi will be in
// the first iteration with the subsequent phis already having been removed.
uint32_t initalizer_id =
GetPhiDefID(loop_induction_variable_, loop->GetPreHeaderBlock()->id());
context_->ReplaceAllUsesWith(loop_induction_variable_->result_id(),
initalizer_id);
// Remove the now unused phi.
context_->KillInst(loop_induction_variable_);
}
// Uses the first loop to create a copy of the loop with new IDs.
void LoopUnrollerUtilsImpl::DuplicateLoop(ir::Loop* old_loop,
ir::Loop* new_loop) {
std::vector<ir::BasicBlock*> new_block_order;
// Copy every block in the old loop.
for (const ir::BasicBlock* itr : loop_blocks_inorder_) {
CopyBasicBlock(old_loop, itr, true);
new_block_order.push_back(blocks_to_add_.back().get());
}
ir::BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
AssignNewResultIds(new_merge);
state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
for (auto& pair : state_.new_blocks) {
RemapOperands(pair.second);
}
loop_blocks_inorder_ = std::move(new_block_order);
AddBlocksToLoop(new_loop);
new_loop->SetHeaderBlock(state_.new_header_block);
new_loop->SetLatchBlock(state_.new_continue_block);
new_loop->SetMergeBlock(new_merge);
}
void LoopUnrollerUtilsImpl::AddBlocksToFunction(
const ir::BasicBlock* insert_point) {
for (ir::Instruction* inst : dead_instructions_) {
context_->KillInst(inst);
}
for (auto basic_block_iterator = function_.begin();
basic_block_iterator != function_.end(); ++basic_block_iterator) {
if (basic_block_iterator->id() == insert_point->id()) {
basic_block_iterator.InsertBefore(&blocks_to_add_);
return;
}
}
assert(
false &&
"Could not add basic blocks to function as insert point was not found.");
}
// Assign all result_ids in |basic_block| instructions to new IDs and preserve
// the mapping of new ids to old ones.
void LoopUnrollerUtilsImpl::AssignNewResultIds(ir::BasicBlock* basic_block) {
// Label instructions aren't covered by normal traversal of the
// instructions.
uint32_t new_label_id = context_->TakeNextId();
// Assign a new id to the label.
state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
basic_block->GetLabelInst()->SetResultId(new_label_id);
for (ir::Instruction& inst : *basic_block) {
uint32_t old_id = inst.result_id();
// Ignore stores etc.
if (old_id == 0) {
continue;
}
// Give the instruction a new id.
inst.SetResultId(context_->TakeNextId());
// Save the mapping of old_id -> new_id.
state_.new_inst[old_id] = inst.result_id();
// Check if this instruction is the induction variable.
if (loop_induction_variable_->result_id() == old_id) {
// Save a pointer to the new copy of it.
state_.new_phi = &inst;
}
}
}
// For all instructions in |basic_block| check if the operands used are from a
// copied instruction and if so swap out the operand for the copy of it.
void LoopUnrollerUtilsImpl::RemapOperands(ir::BasicBlock* basic_block) {
for (ir::Instruction& inst : *basic_block) {
auto remap_operands_to_new_ids = [this](uint32_t* id) {
auto itr = state_.new_inst.find(*id);
if (itr != state_.new_inst.end()) {
*id = itr->second;
}
};
inst.ForEachInId(remap_operands_to_new_ids);
}
}
// Generate the ordered list of basic blocks in the |loop| and cache it for
// later use.
void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(ir::Loop* loop) {
loop_blocks_inorder_.clear();
opt::DominatorAnalysis* analysis =
context_->GetDominatorAnalysis(&function_, *context_->cfg());
opt::DominatorTree& tree = analysis->GetDomTree();
// Starting at the loop header BasicBlock, traverse the dominator tree until
// we reach the merge block and add every node we traverse to the set of
// blocks
// which we consider to be the loop.
auto begin_itr = tree.GetTreeNode(loop->GetHeaderBlock())->df_begin();
const ir::BasicBlock* merge = loop->GetMergeBlock();
auto func = [merge, &tree, this](DominatorTreeNode* node) {
if (!tree.Dominates(merge->id(), node->id())) {
this->loop_blocks_inorder_.push_back(node->bb_);
return true;
}
return false;
};
tree.VisitChildrenIf(func, begin_itr);
}
// Adds the blocks_to_add_ to both the loop and to the parent.
void LoopUnrollerUtilsImpl::AddBlocksToLoop(ir::Loop* loop) const {
// Add the blocks to this loop.
for (auto& block_itr : blocks_to_add_) {
loop->AddBasicBlock(block_itr.get());
}
// Add the blocks to the parent as well.
if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
}
/*
* End LoopUtilsImpl.
*/
} // namespace
/*
*
* Begin Utils.
*
* */
bool LoopUtils::CanPerformUnroll() {
// The loop is expected to be in structured order.
if (!loop_->GetHeaderBlock()->GetMergeInst()) {
return false;
}
// Find check the loop has a condition we can find and evaluate.
const ir::BasicBlock* condition = loop_->FindConditionBlock();
if (!condition) return false;
// Check that we can find and process the induction variable.
const ir::Instruction* induction = loop_->FindInductionVariable(condition);
if (!induction || induction->opcode() != SpvOpPhi) return false;
// Check that we can find the number of loop iterations.
if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
return false;
// Make sure the continue block is a unconditional branch to the header
// block.
const ir::Instruction& branch = *loop_->GetLatchBlock()->ctail();
bool branching_assumption =
branch.opcode() == SpvOpBranch &&
branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
if (!branching_assumption) {
return false;
}
// Make sure the induction is the only phi instruction we have in the loop
// header. Other optimizations have been seen to leave dead phi nodes in the
// header so we also check that the phi is used.
for (const ir::Instruction& inst : *loop_->GetHeaderBlock()) {
if (inst.opcode() == SpvOpPhi &&
inst.result_id() != induction->result_id()) {
return false;
}
}
// Ban breaks within the loop.
const std::vector<uint32_t>& merge_block_preds =
context_->cfg()->preds(loop_->GetMergeBlock()->id());
if (merge_block_preds.size() != 1) {
return false;
}
// Ban continues within the loop.
const std::vector<uint32_t>& continue_block_preds =
context_->cfg()->preds(loop_->GetLatchBlock()->id());
if (continue_block_preds.size() != 1) {
return false;
}
// Ban returns in the loop.
// Iterate over all the blocks within the loop and check that none of them
// exit the loop.
for (uint32_t label_id : loop_->GetBlocks()) {
const ir::BasicBlock* block = context_->cfg()->block(label_id);
if (block->ctail()->opcode() == SpvOp::SpvOpKill ||
block->ctail()->opcode() == SpvOp::SpvOpReturn ||
block->ctail()->opcode() == SpvOp::SpvOpReturnValue) {
return false;
}
}
// Can only unroll inner loops.
if (!loop_->AreAllChildrenMarkedForRemoval()) {
return false;
}
for (uint32_t block_id : loop_->GetBlocks()) {
opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
ir::BasicBlock& bb = *context_->cfg()->block(block_id);
// For every instruction in the block.
for (ir::Instruction& inst : bb) {
if (inst.result_id() == 0) continue;
auto is_used_outside_loop = [this,
def_use_manager](ir::Instruction* user) {
if (!loop_->IsInsideLoop(user)) {
// Some optimization passes have been seen to leave dead phis in the
// IR so we check that if a phi is used outside of the loop that the
// user is not dead.
if (!(user->opcode() == SpvOpPhi &&
def_use_manager->NumUsers(user) == 0))
return false;
}
return true;
};
if (!def_use_manager->WhileEachUser(&inst, is_used_outside_loop)) {
return false;
}
}
}
return true;
}
bool LoopUtils::PartiallyUnroll(size_t factor) {
if (factor == 1 || !CanPerformUnroll()) return false;
// Create the unroller utility.
LoopUnrollerUtilsImpl unroller{context_,
loop_->GetHeaderBlock()->GetParent()};
unroller.Init(loop_);
// If the unrolling factor is larger than or the same size as the loop just
// fully unroll the loop.
if (factor >= unroller.GetLoopIterationCount()) {
unroller.FullyUnroll(loop_);
return true;
}
// If the loop unrolling factor is an residual number of iterations we need to
// let run the loop for the residual part then let it branch into the unrolled
// remaining part. We add one when calucating the remainder to take into
// account the one iteration already in the loop.
if (unroller.GetLoopIterationCount() % factor != 0) {
unroller.PartiallyUnrollResidualFactor(loop_, factor);
} else {
unroller.PartiallyUnroll(loop_, factor);
}
return true;
}
bool LoopUtils::FullyUnroll() {
if (!CanPerformUnroll()) return false;
LoopUnrollerUtilsImpl unroller{context_,
loop_->GetHeaderBlock()->GetParent()};
unroller.Init(loop_);
unroller.FullyUnroll(loop_);
return true;
}
void LoopUtils::Finalize() {
// Clean up the loop descriptor to preserve the analysis.
ir::LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
LD->PostModificationCleanup();
}
/*
*
* Begin Pass.
*
*/
Pass::Status LoopUnroller::Process(ir::IRContext* c) {
context_ = c;
bool changed = false;
for (ir::Function& f : *c->module()) {
ir::LoopDescriptor* LD = context_->GetLoopDescriptor(&f);
for (ir::Loop& loop : *LD) {
LoopUtils loop_utils{c, &loop};
if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
continue;
}
loop_utils.FullyUnroll();
changed = true;
}
LD->PostModificationCleanup();
}
return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
} // namespace opt
} // namespace spvtools

View File

@ -0,0 +1,37 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_LOOP_UNROLLER_H_
#define SOURCE_OPT_LOOP_UNROLLER_H_
#include "opt/pass.h"
namespace spvtools {
namespace opt {
class LoopUnroller : public Pass {
public:
LoopUnroller() : Pass() {}
const char* name() const override { return "Loop unroller"; }
Status Process(ir::IRContext* context) override;
private:
ir::IRContext* context_;
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_LOOP_UNROLLER_H_

View File

@ -12,8 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef LIBSPIRV_OPT_LOOP_UTILS_H_
#define LIBSPIRV_OPT_LOOP_UTILS_H_
#ifndef SOURCE_OPT_LOOP_UTILS_H_
#define SOURCE_OPT_LOOP_UTILS_H_
#include <list>
#include <memory>
#include <vector>
#include "opt/loop_descriptor.h"
namespace spvtools {
@ -24,11 +28,15 @@ class IRContext;
namespace opt {
// Set of basic loop transformation.
// LoopUtils is used to encapsulte loop optimizations and from the passes which
// use them. Any pass which needs a loop optimization should do it through this
// or through a pass which is using this.
class LoopUtils {
public:
LoopUtils(ir::IRContext* context, ir::Loop* loop)
: context_(context), loop_(loop) {}
: context_(context),
loop_(loop),
function_(*loop_->GetHeaderBlock()->GetParent()) {}
// The converts the current loop to loop closed SSA form.
// In the loop closed SSA, all loop exiting values go through a dedicated Phi
@ -64,12 +72,42 @@ class LoopUtils {
// Preserves: CFG, def/use and instruction to block mapping.
void CreateLoopDedicatedExits();
// Perfom a partial unroll of |loop| by given |factor|. This will copy the
// body of the loop |factor| times. So a |factor| of one would give a new loop
// with the original body plus one unrolled copy body.
bool PartiallyUnroll(size_t factor);
// Fully unroll |loop|.
bool FullyUnroll();
// This function validates that |loop| meets the assumptions made by the
// implementation of the loop unroller. As the implementation accommodates
// more types of loops this function can reduce its checks.
//
// The conditions checked to ensure the loop can be unrolled are as follows:
// 1. That the loop is in structured order.
// 2. That the condinue block is a branch to the header.
// 3. That the only phi used in the loop is the induction variable.
// TODO(stephen@codeplay.com): This is a temporary mesure, after the loop is
// converted into LCSAA form and has a single entry and exit we can rewrite
// the other phis.
// 4. That this is an inner most loop, or that loops contained within this
// loop have already been fully unrolled.
// 5. That each instruction in the loop is only used within the loop.
// (Related to the above phi condition).
bool CanPerformUnroll();
// Maintains the loop descriptor object after the unroll functions have been
// called, otherwise the analysis should be invalidated.
void Finalize();
private:
ir::IRContext* context_;
ir::Loop* loop_;
ir::Function& function_;
};
} // namespace opt
} // namespace spvtools
#endif // LIBSPIRV_OPT_LOOP_UTILS_H_
#endif // SOURCE_OPT_LOOP_UTILS_H_

View File

@ -389,4 +389,9 @@ Optimizer::PassToken CreateSimplificationPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::SimplificationPass>());
}
Optimizer::PassToken CreateLoopFullyUnrollPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::LoopUnroller>());
}
} // namespace spvtools

View File

@ -41,6 +41,7 @@
#include "local_single_block_elim_pass.h"
#include "local_single_store_elim_pass.h"
#include "local_ssa_elim_pass.h"
#include "loop_unroller.h"
#include "merge_return_pass.h"
#include "null_pass.h"
#include "private_to_local_pass.h"
@ -53,5 +54,4 @@
#include "strip_debug_info_pass.h"
#include "unify_const_pass.h"
#include "workaround1209.h"
#endif // LIBSPIRV_OPT_PASSES_H_

View File

@ -66,3 +66,16 @@ add_spvtools_unittest(TARGET licm_hoist_no_preheader
hoist_without_preheader.cpp
LIBS SPIRV-Tools-opt
)
add_spvtools_unittest(TARGET loop_unroll_simple
SRCS ../function_utils.h
unroll_simple.cpp
LIBS SPIRV-Tools-opt
)
add_spvtools_unittest(TARGET loop_unroll_assumtion_checks
SRCS ../function_utils.h
unroll_assumptions.cpp
LIBS SPIRV-Tools-opt
)

View File

@ -0,0 +1,627 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include "../assembly_builder.h"
#include "../function_utils.h"
#include "../pass_fixture.h"
#include "../pass_utils.h"
#include "opt/loop_unroller.h"
#include "opt/loop_utils.h"
#include "opt/pass.h"
namespace {
using namespace spvtools;
using ::testing::UnorderedElementsAre;
template <int factor>
class PartialUnrollerTestPass : public opt::Pass {
public:
PartialUnrollerTestPass() : Pass() {}
const char* name() const override { return "Loop unroller"; }
Status Process(ir::IRContext* context) override {
bool changed = false;
for (ir::Function& f : *context->module()) {
ir::LoopDescriptor& loop_descriptor = *context->GetLoopDescriptor(&f);
for (auto& loop : loop_descriptor) {
opt::LoopUtils loop_utils{context, &loop};
if (loop_utils.PartiallyUnroll(factor)) {
changed = true;
}
}
}
if (changed) return Pass::Status::SuccessWithChange;
return Pass::Status::SuccessWithoutChange;
}
};
using PassClassTest = PassTest<::testing::Test>;
/*
Generated from the following GLSL
#version 410 core
layout(location = 0) flat in int in_upper_bound;
void main() {
for (int i = ; i < in_upper_bound; ++i) {
x[i] = 1.0f;
}
}
*/
TEST_F(PassClassTest, CheckUpperBound) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main" %3
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 410
OpName %2 "main"
OpName %3 "in_upper_bound"
OpName %4 "x"
OpDecorate %3 Flat
OpDecorate %3 Location 0
%5 = OpTypeVoid
%6 = OpTypeFunction %5
%7 = OpTypeInt 32 1
%8 = OpTypePointer Function %7
%9 = OpConstant %7 0
%10 = OpTypePointer Input %7
%3 = OpVariable %10 Input
%11 = OpTypeBool
%12 = OpTypeFloat 32
%13 = OpTypeInt 32 0
%14 = OpConstant %13 10
%15 = OpTypeArray %12 %14
%16 = OpTypePointer Function %15
%17 = OpConstant %12 1
%18 = OpTypePointer Function %12
%19 = OpConstant %7 1
%2 = OpFunction %5 None %6
%20 = OpLabel
%4 = OpVariable %16 Function
OpBranch %21
%21 = OpLabel
%22 = OpPhi %7 %9 %20 %23 %24
OpLoopMerge %25 %24 None
OpBranch %26
%26 = OpLabel
%27 = OpLoad %7 %3
%28 = OpSLessThan %11 %22 %27
OpBranchConditional %28 %29 %25
%29 = OpLabel
%30 = OpAccessChain %18 %4 %22
OpStore %30 %17
OpBranch %24
%24 = OpLabel
%23 = OpIAdd %7 %22 %19
OpBranch %21
%25 = OpLabel
OpReturn
OpFunctionEnd
)";
// clang-format on
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
// Make sure the pass doesn't run
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
SinglePassRunAndCheck<PartialUnrollerTestPass<1>>(text, text, false);
SinglePassRunAndCheck<PartialUnrollerTestPass<2>>(text, text, false);
}
/*
Generated from the following GLSL
#version 410 core
void main() {
float out_array[10];
int i = 0;
for (int i = 0; i < 10; ++i) {
out_array[i] = i;
}
out_array[9] = i*10;
}
*/
TEST_F(PassClassTest, InductionUsedOutsideOfLoop) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 410
OpName %2 "main"
OpName %3 "out_array"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 1
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 10
%10 = OpTypeBool
%11 = OpTypeFloat 32
%12 = OpTypeInt 32 0
%13 = OpConstant %12 10
%14 = OpTypeArray %11 %13
%15 = OpTypePointer Function %14
%16 = OpTypePointer Function %11
%17 = OpConstant %6 1
%18 = OpConstant %6 9
%2 = OpFunction %4 None %5
%19 = OpLabel
%3 = OpVariable %15 Function
OpBranch %20
%20 = OpLabel
%21 = OpPhi %6 %8 %19 %22 %23
OpLoopMerge %24 %23 Unroll
OpBranch %25
%25 = OpLabel
%26 = OpSLessThan %10 %21 %9
OpBranchConditional %26 %27 %24
%27 = OpLabel
%28 = OpConvertSToF %11 %21
%29 = OpAccessChain %16 %3 %21
OpStore %29 %28
OpBranch %23
%23 = OpLabel
%22 = OpIAdd %6 %21 %17
OpBranch %20
%24 = OpLabel
%30 = OpIMul %6 %21 %9
%31 = OpConvertSToF %11 %30
%32 = OpAccessChain %16 %3 %18
OpStore %32 %31
OpReturn
OpFunctionEnd
)";
// clang-format on
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
// Make sure the pass doesn't run
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
SinglePassRunAndCheck<PartialUnrollerTestPass<1>>(text, text, false);
SinglePassRunAndCheck<PartialUnrollerTestPass<2>>(text, text, false);
}
/*
Generated from the following GLSL
#version 410 core
void main() {
float out_array[10];
for (uint i = 0; i < 2; i++) {
for (float x = 0; x < 5; ++x) {
out_array[x + i*5] = i;
}
}
}
*/
TEST_F(PassClassTest, UnrollNestedLoopsInvalid) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 410
OpName %2 "main"
OpName %3 "out_array"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 0
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 2
%10 = OpTypeBool
%11 = OpTypeInt 32 1
%12 = OpTypePointer Function %11
%13 = OpConstant %11 0
%14 = OpConstant %11 5
%15 = OpTypeFloat 32
%16 = OpConstant %6 10
%17 = OpTypeArray %15 %16
%18 = OpTypePointer Function %17
%19 = OpConstant %6 5
%20 = OpTypePointer Function %15
%21 = OpConstant %11 1
%22 = OpUndef %11
%2 = OpFunction %4 None %5
%23 = OpLabel
%3 = OpVariable %18 Function
OpBranch %24
%24 = OpLabel
%25 = OpPhi %6 %8 %23 %26 %27
%28 = OpPhi %11 %22 %23 %29 %27
OpLoopMerge %30 %27 None
OpBranch %31
%31 = OpLabel
%32 = OpULessThan %10 %25 %9
OpBranchConditional %32 %33 %30
%33 = OpLabel
OpBranch %34
%34 = OpLabel
%29 = OpPhi %11 %13 %33 %35 %36
OpLoopMerge %37 %36 None
OpBranch %38
%38 = OpLabel
%39 = OpSLessThan %10 %29 %14
OpBranchConditional %39 %40 %37
%40 = OpLabel
%41 = OpBitcast %6 %29
%42 = OpIMul %6 %25 %19
%43 = OpIAdd %6 %41 %42
%44 = OpConvertUToF %15 %25
%45 = OpAccessChain %20 %3 %43
OpStore %45 %44
OpBranch %36
%36 = OpLabel
%35 = OpIAdd %11 %29 %21
OpBranch %34
%37 = OpLabel
OpBranch %27
%27 = OpLabel
%26 = OpIAdd %6 %25 %21
OpBranch %24
%30 = OpLabel
OpReturn
OpFunctionEnd
)";
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
}
/*
Generated from the following GLSL
#version 440 core
void main(){
float x[10];
int ind = 0;
for (int i = 0; i < 10; i++) {
ind = i;
x[i] = i;
}
}
*/
TEST_F(PassClassTest, MultiplePhiInHeader) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 440
OpName %2 "main"
OpName %3 "x"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 1
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 10
%10 = OpTypeBool
%11 = OpTypeFloat 32
%12 = OpTypeInt 32 0
%13 = OpConstant %12 10
%14 = OpTypeArray %11 %13
%15 = OpTypePointer Function %14
%16 = OpTypePointer Function %11
%17 = OpConstant %6 1
%2 = OpFunction %4 None %5
%18 = OpLabel
%3 = OpVariable %15 Function
OpBranch %19
%19 = OpLabel
%20 = OpPhi %6 %8 %18 %21 %22
%21 = OpPhi %6 %8 %18 %23 %22
OpLoopMerge %24 %22 None
OpBranch %25
%25 = OpLabel
%26 = OpSLessThan %10 %21 %9
OpBranchConditional %26 %27 %24
%27 = OpLabel
%28 = OpConvertSToF %11 %21
%29 = OpAccessChain %16 %3 %21
OpStore %29 %28
OpBranch %22
%22 = OpLabel
%23 = OpIAdd %6 %21 %17
OpBranch %19
%24 = OpLabel
OpReturn
OpFunctionEnd
)";
// clang-format on
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
}
/*
Generated from the following GLSL
#version 440 core
void main(){
float x[10];
for (int i = 0; i < 10; i++) {
if (i == 5) {
break;
}
x[i] = i;
}
}
*/
TEST_F(PassClassTest, BreakInBody) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 440
OpName %2 "main"
OpName %3 "x"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 1
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 10
%10 = OpTypeBool
%11 = OpConstant %6 5
%12 = OpTypeFloat 32
%13 = OpTypeInt 32 0
%14 = OpConstant %13 10
%15 = OpTypeArray %12 %14
%16 = OpTypePointer Function %15
%17 = OpTypePointer Function %12
%18 = OpConstant %6 1
%2 = OpFunction %4 None %5
%19 = OpLabel
%3 = OpVariable %16 Function
OpBranch %20
%20 = OpLabel
%21 = OpPhi %6 %8 %19 %22 %23
OpLoopMerge %24 %23 Unroll
OpBranch %25
%25 = OpLabel
%26 = OpSLessThan %10 %21 %9
OpBranchConditional %26 %27 %24
%27 = OpLabel
%28 = OpIEqual %10 %21 %11
OpSelectionMerge %29 None
OpBranchConditional %28 %30 %29
%30 = OpLabel
OpBranch %24
%29 = OpLabel
%31 = OpConvertSToF %12 %21
%32 = OpAccessChain %17 %3 %21
OpStore %32 %31
OpBranch %23
%23 = OpLabel
%22 = OpIAdd %6 %21 %18
OpBranch %20
%24 = OpLabel
OpReturn
OpFunctionEnd
)";
// clang-format on
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
}
/*
Generated from the following GLSL
#version 440 core
void main(){
float x[10];
for (int i = 0; i < 10; i++) {
if (i == 5) {
continue;
}
x[i] = i;
}
}
*/
TEST_F(PassClassTest, ContinueInBody) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 440
OpName %2 "main"
OpName %3 "x"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 1
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 10
%10 = OpTypeBool
%11 = OpConstant %6 5
%12 = OpTypeFloat 32
%13 = OpTypeInt 32 0
%14 = OpConstant %13 10
%15 = OpTypeArray %12 %14
%16 = OpTypePointer Function %15
%17 = OpTypePointer Function %12
%18 = OpConstant %6 1
%2 = OpFunction %4 None %5
%19 = OpLabel
%3 = OpVariable %16 Function
OpBranch %20
%20 = OpLabel
%21 = OpPhi %6 %8 %19 %22 %23
OpLoopMerge %24 %23 Unroll
OpBranch %25
%25 = OpLabel
%26 = OpSLessThan %10 %21 %9
OpBranchConditional %26 %27 %24
%27 = OpLabel
%28 = OpIEqual %10 %21 %11
OpSelectionMerge %29 None
OpBranchConditional %28 %30 %29
%30 = OpLabel
OpBranch %23
%29 = OpLabel
%31 = OpConvertSToF %12 %21
%32 = OpAccessChain %17 %3 %21
OpStore %32 %31
OpBranch %23
%23 = OpLabel
%22 = OpIAdd %6 %21 %18
OpBranch %20
%24 = OpLabel
OpReturn
OpFunctionEnd
)";
// clang-format on
std::unique_ptr<ir::IRContext> context =
BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
ir::Module* module = context->module();
EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
<< text << std::endl;
opt::LoopUnroller loop_unroller;
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
}
/*
Generated from the following GLSL
#version 440 core
void main(){
float x[10];
for (int i = 0; i < 10; i++) {
if (i == 5) {
return;
}
x[i] = i;
}
}
*/
TEST_F(PassClassTest, ReturnInBody) {
// clang-format off
// With opt::LocalMultiStoreElimPass
const std::string text = R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %2 "main"
OpExecutionMode %2 OriginUpperLeft
OpSource GLSL 440
OpName %2 "main"
OpName %3 "x"
%4 = OpTypeVoid
%5 = OpTypeFunction %4
%6 = OpTypeInt 32 1
%7 = OpTypePointer Function %6
%8 = OpConstant %6 0
%9 = OpConstant %6 10
%10 = OpTypeBool
%11 = OpConstant %6 5
%12 = OpTypeFloat 32
%13 = OpTypeInt 32 0
%14 = OpConstant %13 10
%15 = OpTypeArray %12 %14
%16 = OpTypePointer Function %15
%17 = OpTypePointer Function %12
%18 = OpConstant %6 1
%2 = OpFunction %4 None %5
%19 = OpLabel
%3 = OpVariable %16 Function
OpBranch %20
%20 = OpLabel
%21 = OpPhi %6 %8 %19 %22 %23
OpLoopMerge %24 %23 Unroll
OpBranch %25
%25 = OpLabel
%26 = OpSLessThan %10 %21 %9
OpBranchConditional %26 %27 %24
%27 = OpLabel
%28 = OpIEqual %10 %21 %11
OpSelectionMerge %29 None
OpBranchConditional %28 %30 %29
%30 = OpLabel
OpReturn
%29 = OpLabel
%31 = OpConvertSToF %12 %21
%32 = OpAccessChain %17 %3 %21
OpStore %32 %31
OpBranch %23
%23 = OpLabel
%22 = OpIAdd %6 %21 %18
OpBranch %20
%24 = OpLabel
OpReturn
OpFunctionEnd
)";
// clang-format on
SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
SinglePassRunAndCheck<opt::LoopUnroller>(text, text, false);
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -472,6 +472,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
optimizer->RegisterPass(CreateReplaceInvalidOpcodePass());
} else if (0 == strcmp(cur_arg, "--simplify-instructions")) {
optimizer->RegisterPass(CreateSimplificationPass());
} else if (0 == strcmp(cur_arg, "--loop-unroll")) {
optimizer->RegisterPass(CreateLoopFullyUnrollPass());
} else if (0 == strcmp(cur_arg, "--skip-validation")) {
*skip_validator = true;
} else if (0 == strcmp(cur_arg, "-O")) {