diff --git a/Android.mk b/Android.mk index 4429ad183..074ceb227 100644 --- a/Android.mk +++ b/Android.mk @@ -101,6 +101,8 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/local_ssa_elim_pass.cpp \ source/opt/loop_descriptor.cpp \ source/opt/loop_unroller.cpp \ + source/opt/loop_unswitch_pass.cpp \ + source/opt/loop_utils.cpp \ source/opt/mem_pass.cpp \ source/opt/merge_return_pass.cpp \ source/opt/module.cpp \ diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index adb014af7..bedf436ef 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -468,6 +468,12 @@ Optimizer::PassToken CreateLocalRedundancyEliminationPass(); // the loops preheader. Optimizer::PassToken CreateLoopInvariantCodeMotionPass(); +// Creates a loop unswitch pass. +// This pass will look for loop independent branch conditions and move the +// condition out of the loop and version the loop based on the taken branch. +// Works best after LICM and local multi store elimination pass. +Optimizer::PassToken CreateLoopUnswitchPass(); + // Create global value numbering pass. // This pass will look for instructions where the same value is computed on all // paths leading to the instruction. Those instructions are deleted. diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index 019485159..854c9509c 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(SPIRV-Tools-opt loop_descriptor.h loop_unroller.h loop_utils.h + loop_unswitch_pass.h make_unique.h mem_pass.h merge_return_pass.h @@ -132,6 +133,7 @@ add_library(SPIRV-Tools-opt loop_descriptor.cpp loop_utils.cpp loop_unroller.cpp + loop_unswitch_pass.cpp mem_pass.cpp merge_return_pass.cpp module.cpp diff --git a/source/opt/basic_block.cpp b/source/opt/basic_block.cpp index b07696b17..65030eaae 100644 --- a/source/opt/basic_block.cpp +++ b/source/opt/basic_block.cpp @@ -14,6 +14,7 @@ #include "basic_block.h" #include "function.h" +#include "ir_context.h" #include "module.h" #include "reflect.h" @@ -89,6 +90,14 @@ Instruction* BasicBlock::GetLoopMergeInst() { return nullptr; } +void BasicBlock::KillAllInsts(bool killLabel) { + ForEachInst([killLabel](ir::Instruction* ip) { + if (killLabel || ip->opcode() != SpvOpLabel) { + ip->context()->KillInst(ip); + } + }); +} + void BasicBlock::ForEachSuccessorLabel( const std::function& f) const { const auto br = &insts_.back(); diff --git a/source/opt/basic_block.h b/source/opt/basic_block.h index d0186e6e0..5f3d393e8 100644 --- a/source/opt/basic_block.h +++ b/source/opt/basic_block.h @@ -171,6 +171,10 @@ class BasicBlock { // Returns true if this basic block exits this function or aborts execution. bool IsReturnOrAbort() const { return ctail()->IsReturnOrAbort(); } + // Kill all instructions in this block. Whether or not to kill the label is + // indicated by |killLabel|. + void KillAllInsts(bool killLabel); + private: // The enclosing function. Function* function_; diff --git a/source/opt/cfg.h b/source/opt/cfg.h index 53dddd234..b21a273d7 100644 --- a/source/opt/cfg.h +++ b/source/opt/cfg.h @@ -17,6 +17,7 @@ #include "basic_block.h" +#include #include #include #include @@ -83,6 +84,22 @@ class CFG { AddEdges(blk); } + // Removes from the CFG any mapping for the basic block id |blk_id|. + void ForgetBlock(const ir::BasicBlock* blk) { + id2block_.erase(blk->id()); + label2preds_.erase(blk->id()); + blk->ForEachSuccessorLabel( + [blk, this](uint32_t succ_id) { RemoveEdge(blk->id(), succ_id); }); + } + + void RemoveEdge(uint32_t pred_blk_id, uint32_t succ_blk_id) { + auto pred_it = label2preds_.find(succ_blk_id); + if (pred_it == label2preds_.end()) return; + auto& preds_list = pred_it->second; + auto it = std::find(preds_list.begin(), preds_list.end(), pred_blk_id); + if (it != preds_list.end()) preds_list.erase(it); + } + // Registers |blk| to all of its successors. void AddEdges(ir::BasicBlock* blk); diff --git a/source/opt/dominator_tree.cpp b/source/opt/dominator_tree.cpp index c22d74380..776adf4b3 100644 --- a/source/opt/dominator_tree.cpp +++ b/source/opt/dominator_tree.cpp @@ -358,6 +358,10 @@ void DominatorTree::InitializeTree(const ir::Function* f, const ir::CFG& cfg) { second->children_.push_back(first); } + ResetDFNumbering(); +} + +void DominatorTree::ResetDFNumbering() { int index = 0; auto preFunc = [&index](const DominatorTreeNode* node) { const_cast(node)->dfs_num_pre_ = ++index; diff --git a/source/opt/dominator_tree.h b/source/opt/dominator_tree.h index 5221eea1c..39d5e0297 100644 --- a/source/opt/dominator_tree.h +++ b/source/opt/dominator_tree.h @@ -15,6 +15,7 @@ #ifndef LIBSPIRV_OPT_DOMINATOR_ANALYSIS_TREE_H_ #define LIBSPIRV_OPT_DOMINATOR_ANALYSIS_TREE_H_ +#include #include #include #include @@ -195,7 +196,9 @@ class DominatorTree { } // Returns true if the basic block id |a| is reachable by this tree. - bool ReachableFromRoots(uint32_t a) const; + bool ReachableFromRoots(uint32_t a) const { + return GetTreeNode(a) != nullptr; + } // Returns true if this tree is a post dominator tree. bool IsPostDominator() const { return postdominator_; } @@ -267,11 +270,14 @@ class DominatorTree { return &node_iter->second; } - private: // Adds the basic block |bb| to the tree structure if it doesn't already // exist. DominatorTreeNode* GetOrInsertNode(ir::BasicBlock* bb); + // Recomputes the DF numbering of the tree. + void ResetDFNumbering(); + + private: // Wrapper function which gets the list of pairs of each BasicBlocks to its // immediately dominating BasicBlock and stores the result in the the edges // parameter. diff --git a/source/opt/function.h b/source/opt/function.h index 0da62a8f0..17e063769 100644 --- a/source/opt/function.h +++ b/source/opt/function.h @@ -59,6 +59,10 @@ class Function { inline void AddParameter(std::unique_ptr p); // Appends a basic block to this function. inline void AddBasicBlock(std::unique_ptr b); + // Appends a basic block to this function at the position |ip|. + inline void AddBasicBlock(std::unique_ptr b, iterator ip); + template + inline void AddBasicBlocks(T begin, T end, iterator ip); // Saves the given function end instruction. inline void SetFunctionEnd(std::unique_ptr end_inst); @@ -73,6 +77,11 @@ class Function { // Returns function's return type id inline uint32_t type_id() const { return def_inst_->type_id(); } + // Returns the basic block container for this function. + const std::vector>* GetBlocks() const { + return &blocks_; + } + // Returns the entry basic block for this function. const std::unique_ptr& entry() const { return blocks_.front(); } @@ -123,7 +132,18 @@ inline void Function::AddParameter(std::unique_ptr p) { } inline void Function::AddBasicBlock(std::unique_ptr b) { - blocks_.emplace_back(std::move(b)); + AddBasicBlock(std::move(b), end()); +} + +inline void Function::AddBasicBlock(std::unique_ptr b, + iterator ip) { + ip.InsertBefore(std::move(b)); +} + +template +inline void Function::AddBasicBlocks(T src_begin, T src_end, iterator ip) { + blocks_.insert(ip.Get(), std::make_move_iterator(src_begin), + std::make_move_iterator(src_end)); } inline void Function::SetFunctionEnd(std::unique_ptr end_inst) { diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h index a1a1d1e0d..aa722cbd6 100644 --- a/source/opt/ir_builder.h +++ b/source/opt/ir_builder.h @@ -105,6 +105,44 @@ class InstructionBuilder { return AddInstruction(std::move(new_branch)); } + // Creates a new switch instruction and the associated selection merge + // instruction if requested. + // The id |selector_id| is the id of the selector instruction, must be of + // type int. + // The id |default_id| is the id of the default basic block to branch to. + // The vector |targets| is the pair of literal/branch id. + // The id |merge_id| is the id of the merge basic block for the selection + // merge instruction. If |merge_id| equals kInvalidId then no selection merge + // instruction will be created. + // The value |selection_control| is the selection control flag for the + // selection merge instruction. + // Note that the user must make sure the final basic block is + // well formed. + ir::Instruction* AddSwitch( + uint32_t selector_id, uint32_t default_id, + const std::vector, uint32_t>>& targets, + uint32_t merge_id = kInvalidId, + uint32_t selection_control = SpvSelectionControlMaskNone) { + if (merge_id != kInvalidId) { + AddSelectionMerge(merge_id, selection_control); + } + std::vector operands; + operands.emplace_back( + ir::Operand{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {selector_id}}); + operands.emplace_back( + ir::Operand{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {default_id}}); + for (auto& target : targets) { + operands.emplace_back( + ir::Operand{spv_operand_type_t::SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER, + target.first}); + operands.emplace_back(ir::Operand{spv_operand_type_t::SPV_OPERAND_TYPE_ID, + {target.second}}); + } + std::unique_ptr new_switch( + new ir::Instruction(GetContext(), SpvOpSwitch, 0, 0, operands)); + return AddInstruction(std::move(new_switch)); + } + // Creates a phi instruction. // The id |type| must be the id of the phi instruction's type. // The vector |incomings| must be a sequence of pairs of select( + new ir::Instruction(GetContext(), SpvOpUnreachable, 0, 0, + std::initializer_list{})); + return AddInstruction(std::move(select)); + } + // Inserts the new instruction before the insertion point. ir::Instruction* AddInstruction(std::unique_ptr&& insn) { ir::Instruction* insn_ptr = &*insert_before_.InsertBefore(std::move(insn)); diff --git a/source/opt/iterator.h b/source/opt/iterator.h index 52a8d8648..d43dfbef7 100644 --- a/source/opt/iterator.h +++ b/source/opt/iterator.h @@ -99,6 +99,14 @@ class UptrVectorIterator inline typename std::enable_if::type Erase(); + // Returns the underlying iterator. + UnderlyingIterator Get() const { return iterator_; } + + // Returns a valid end iterator for the underlying container. + UptrVectorIterator End() const { + return UptrVectorIterator(container_, container_->end()); + } + private: UptrVector* container_; // The container we are manipulating. UnderlyingIterator iterator_; // The raw iterator from the container. diff --git a/source/opt/loop_descriptor.cpp b/source/opt/loop_descriptor.cpp index 131363c52..60d946871 100644 --- a/source/opt/loop_descriptor.cpp +++ b/source/opt/loop_descriptor.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include "opt/loop_descriptor.h" +#include #include #include #include @@ -245,11 +246,10 @@ bool Loop::IsBasicBlockInLoopSlow(const BasicBlock* bb) { assert(bb->GetParent() && "The basic block does not belong to a function"); opt::DominatorAnalysis* dom_analysis = context_->GetDominatorAnalysis(bb->GetParent(), *context_->cfg()); - if (!dom_analysis->Dominates(GetHeaderBlock(), bb)) return false; + if (dom_analysis->IsReachable(bb) && + !dom_analysis->Dominates(GetHeaderBlock(), bb)) + return false; - opt::PostDominatorAnalysis* postdom_analysis = - context_->GetPostDominatorAnalysis(bb->GetParent(), *context_->cfg()); - if (!postdom_analysis->Dominates(GetMergeBlock(), bb)) return false; return true; } @@ -378,6 +378,17 @@ void Loop::SetMergeBlock(BasicBlock* merge) { } } +void Loop::SetPreHeaderBlock(BasicBlock* preheader) { + assert(!IsInsideLoop(preheader) && "The preheader block is in the loop"); + assert(preheader->tail()->opcode() == SpvOpBranch && + "The preheader block does not unconditionally branch to the header " + "block"); + assert(preheader->tail()->GetSingleWordOperand(0) == GetHeaderBlock()->id() && + "The preheader block does not unconditionally branch to the header " + "block"); + loop_preheader_ = preheader; +} + void Loop::GetExitBlocks(std::unordered_set* exit_blocks) const { ir::CFG* cfg = context_->cfg(); exit_blocks->clear(); @@ -412,6 +423,43 @@ void Loop::GetMergingBlocks( } } +namespace { + +static inline bool IsBasicBlockSafeToClone(IRContext* context, BasicBlock* bb) { + for (ir::Instruction& inst : *bb) { + if (!inst.IsBranch() && !context->IsCombinatorInstruction(&inst)) + return false; + } + + return true; +} + +} // namespace + +bool Loop::IsSafeToClone() const { + ir::CFG& cfg = *context_->cfg(); + + for (uint32_t bb_id : GetBlocks()) { + BasicBlock* bb = cfg.block(bb_id); + assert(bb); + if (!IsBasicBlockSafeToClone(context_, bb)) return false; + } + + // Look at the merge construct. + if (GetHeaderBlock()->GetLoopMergeInst()) { + std::unordered_set blocks; + GetMergingBlocks(&blocks); + blocks.erase(GetMergeBlock()->id()); + for (uint32_t bb_id : blocks) { + BasicBlock* bb = cfg.block(bb_id); + assert(bb); + if (!IsBasicBlockSafeToClone(context_, bb)) return false; + } + } + + return true; +} + bool Loop::IsLCSSA() const { ir::CFG* cfg = context_->cfg(); opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); @@ -482,7 +530,8 @@ void Loop::ComputeLoopStructuredOrder( ordered_loop_blocks->push_back(loop_merge_); } -LoopDescriptor::LoopDescriptor(const Function* f) : loops_() { +LoopDescriptor::LoopDescriptor(const Function* f) + : loops_(), dummy_top_loop_(nullptr) { PopulateList(f); } @@ -503,6 +552,17 @@ void LoopDescriptor::PopulateList(const Function* f) { ir::make_range(dom_tree.post_begin(), dom_tree.post_end())) { Instruction* merge_inst = node.bb_->GetLoopMergeInst(); if (merge_inst) { + bool all_backedge_unreachable = true; + for (uint32_t pid : context->cfg()->preds(node.bb_->id())) { + if (dom_analysis->IsReachable(pid) && + dom_analysis->Dominates(node.bb_->id(), pid)) { + all_backedge_unreachable = false; + break; + } + } + if (all_backedge_unreachable) + continue; // ignore this one, we actually never branch back. + // The id of the merge basic block of this loop. uint32_t merge_bb_id = merge_inst->GetSingleWordOperand(0); @@ -888,5 +948,48 @@ void LoopDescriptor::ClearLoops() { } loops_.clear(); } + +// Adds a new loop nest to the descriptor set. +ir::Loop* LoopDescriptor::AddLoopNest(std::unique_ptr new_loop) { + ir::Loop* loop = new_loop.release(); + if (!loop->HasParent()) dummy_top_loop_.nested_loops_.push_back(loop); + // Iterate from inner to outer most loop, adding basic block to loop mapping + // as we go. + for (ir::Loop& current_loop : + make_range(iterator::begin(loop), iterator::end(nullptr))) { + loops_.push_back(¤t_loop); + for (uint32_t bb_id : current_loop.GetBlocks()) + basic_block_to_loop_.insert(std::make_pair(bb_id, ¤t_loop)); + } + + return loop; +} + +void LoopDescriptor::RemoveLoop(ir::Loop* loop) { + ir::Loop* parent = loop->GetParent() ? loop->GetParent() : &dummy_top_loop_; + parent->nested_loops_.erase(std::find(parent->nested_loops_.begin(), + parent->nested_loops_.end(), loop)); + std::for_each( + loop->nested_loops_.begin(), loop->nested_loops_.end(), + [loop](ir::Loop* sub_loop) { sub_loop->SetParent(loop->GetParent()); }); + parent->nested_loops_.insert(parent->nested_loops_.end(), + loop->nested_loops_.begin(), + loop->nested_loops_.end()); + for (uint32_t bb_id : loop->GetBlocks()) { + ir::Loop* l = FindLoopForBasicBlock(bb_id); + if (l == loop) { + SetBasicBlockToLoop(bb_id, l->GetParent()); + } else { + ForgetBasicBlock(bb_id); + } + } + + LoopContainerType::iterator it = + std::find(loops_.begin(), loops_.end(), loop); + assert(it != loops_.end()); + delete loop; + loops_.erase(it); +} + } // namespace ir } // namespace spvtools diff --git a/source/opt/loop_descriptor.h b/source/opt/loop_descriptor.h index d0421d642..05acce207 100644 --- a/source/opt/loop_descriptor.h +++ b/source/opt/loop_descriptor.h @@ -47,8 +47,8 @@ class Loop { using const_iterator = ChildrenList::const_iterator; using BasicBlockListTy = std::unordered_set; - Loop() - : context_(nullptr), + explicit Loop(IRContext* context) + : context_(context), loop_header_(nullptr), loop_continue_(nullptr), loop_merge_(nullptr), @@ -59,6 +59,8 @@ class Loop { Loop(IRContext* context, opt::DominatorAnalysis* analysis, BasicBlock* header, BasicBlock* continue_target, BasicBlock* merge_target); + ~Loop() {} + // Iterators over the immediate sub-loops. inline iterator begin() { return nested_loops_.begin(); } inline iterator end() { return nested_loops_.end(); } @@ -115,6 +117,11 @@ class Loop { // Returns the loop pre-header. inline const BasicBlock* GetPreHeaderBlock() const { return loop_preheader_; } + // Sets |preheader| as the loop preheader block. A preheader block must have + // the following properties: + // - |merge| must not be in the loop; + // - have an unconditional branch to the loop header. + void SetPreHeaderBlock(BasicBlock* preheader); // Returns the loop pre-header, if there is no suitable preheader it will be // created. @@ -190,7 +197,16 @@ class Loop { // Adds the Basic Block with |id| to this loop and its parents. void AddBasicBlock(uint32_t id) { for (Loop* loop = this; loop != nullptr; loop = loop->parent_) { - loop_basic_blocks_.insert(id); + loop->loop_basic_blocks_.insert(id); + } + } + + // Removes the Basic Block id |bb_id| from this loop and its parents. + // It the user responsibility to make sure the removed block is not a merge, + // header or continue block. + void RemoveBasicBlock(uint32_t bb_id) { + for (Loop* loop = this; loop != nullptr; loop = loop->parent_) { + loop->loop_basic_blocks_.erase(bb_id); } } @@ -264,6 +280,10 @@ class Loop { return true; } + // Checks if the loop contains any instruction that will prevent it from being + // cloned. If the loop is structured, the merge construct is also considered. + bool IsSafeToClone() const; + // Sets the parent loop of this loop, that is, a loop which contains this loop // as a nested child loop. inline void SetParent(Loop* parent) { parent_ = parent; } @@ -384,7 +404,7 @@ class LoopDescriptor { // Disable copy constructor, to avoid double-free on destruction. LoopDescriptor(const LoopDescriptor&) = delete; // Move constructor. - LoopDescriptor(LoopDescriptor&& other) { + LoopDescriptor(LoopDescriptor&& other) : dummy_top_loop_(nullptr) { // We need to take ownership of the Loop objects in the other // LoopDescriptor, to avoid double-free. loops_ = std::move(other.loops_); @@ -446,6 +466,28 @@ class LoopDescriptor { // for addition with AddLoop or MarkLoopForRemoval. void PostModificationCleanup(); + // Removes the basic block id |bb_id| from the block to loop mapping. + inline void ForgetBasicBlock(uint32_t bb_id) { + basic_block_to_loop_.erase(bb_id); + } + + // Adds the loop |new_loop| and all its nested loops to the descriptor set. + // The object takes ownership of all the loops. + ir::Loop* AddLoopNest(std::unique_ptr new_loop); + + // Remove the loop |loop|. + void RemoveLoop(ir::Loop* loop); + + void SetAsTopLoop(ir::Loop* loop) { + assert(std::find(dummy_top_loop_.begin(), dummy_top_loop_.end(), loop) == + dummy_top_loop_.end() && + "already registered"); + dummy_top_loop_.nested_loops_.push_back(loop); + } + + Loop* GetDummyRootLoop() { return &dummy_top_loop_; } + const Loop* GetDummyRootLoop() const { return &dummy_top_loop_; } + private: // TODO(dneto): This should be a vector of unique_ptr. But VisualStudio 2013 // is unable to compile it. diff --git a/source/opt/loop_unswitch_pass.cpp b/source/opt/loop_unswitch_pass.cpp new file mode 100644 index 000000000..53f629950 --- /dev/null +++ b/source/opt/loop_unswitch_pass.cpp @@ -0,0 +1,908 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "loop_unswitch_pass.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "basic_block.h" +#include "dominator_tree.h" +#include "fold.h" +#include "function.h" +#include "instruction.h" +#include "ir_builder.h" +#include "ir_context.h" +#include "loop_descriptor.h" + +#include "loop_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +static const uint32_t kTypePointerStorageClassInIdx = 0; +static const uint32_t kBranchCondTrueLabIdInIdx = 1; +static const uint32_t kBranchCondFalseLabIdInIdx = 2; + +} // anonymous namespace + +namespace { + +// This class handle the unswitch procedure for a given loop. +// The unswitch will not happen if: +// - The loop has any instruction that will prevent it; +// - The loop invariant condition is not uniform. +class LoopUnswitch { + public: + LoopUnswitch(ir::IRContext* context, ir::Function* function, ir::Loop* loop, + ir::LoopDescriptor* loop_desc) + : function_(function), + loop_(loop), + loop_desc_(*loop_desc), + context_(context), + switch_block_(nullptr) {} + + // Returns true if the loop can be unswitched. + // Can be unswitch if: + // - The loop has no instructions that prevents it (such as barrier); + // - The loop has one conditional branch or switch that do not depends on the + // loop; + // - The loop invariant condition is uniform; + bool CanUnswitchLoop() { + if (switch_block_) return true; + if (loop_->IsSafeToClone()) return false; + + ir::CFG& cfg = *context_->cfg(); + + for (uint32_t bb_id : loop_->GetBlocks()) { + ir::BasicBlock* bb = cfg.block(bb_id); + if (bb->terminator()->IsBranch() && + bb->terminator()->opcode() != SpvOpBranch) { + if (IsConditionLoopInvariant(bb->terminator())) { + switch_block_ = bb; + break; + } + } + } + + return switch_block_; + } + + // Return the iterator to the basic block |bb|. + ir::Function::iterator FindBasicBlockPosition(ir::BasicBlock* bb_to_find) { + ir::Function::iterator it = std::find_if( + function_->begin(), function_->end(), + [bb_to_find](const ir::BasicBlock& bb) { return bb_to_find == &bb; }); + assert(it != function_->end() && "Basic Block not found"); + return it; + } + + // Creates a new basic block and insert it into the function |fn| at the + // position |ip|. This function preserves the def/use and instr to block + // managers. + ir::BasicBlock* CreateBasicBlock(ir::Function::iterator ip) { + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + ir::BasicBlock* bb = &*ip.InsertBefore(std::unique_ptr( + new ir::BasicBlock(std::unique_ptr(new ir::Instruction( + context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); + bb->SetParent(function_); + def_use_mgr->AnalyzeInstDef(bb->GetLabelInst()); + context_->set_instr_block(bb->GetLabelInst(), bb); + + return bb; + } + + // Unswitches |loop_|. + void PerformUnswitch() { + assert(CanUnswitchLoop() && + "Cannot unswitch if there is not constant condition"); + assert(loop_->GetPreHeaderBlock() && "This loop has no pre-header block"); + assert(loop_->IsLCSSA() && "This loop is not in LCSSA form"); + + ir::CFG& cfg = *context_->cfg(); + DominatorTree* dom_tree = + &context_->GetDominatorAnalysis(function_, *context_->cfg()) + ->GetDomTree(); + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + LoopUtils loop_utils(context_, loop_); + + ////////////////////////////////////////////////////////////////////////////// + // Step 1: Create the if merge block for structured modules. + // To do so, the |loop_| merge block will become the if's one and we + // create a merge for the loop. This will limit the amount of duplicated + // code the structured control flow imposes. + // For non structured program, the new loop will be connected to + // the old loop's exit blocks. + ////////////////////////////////////////////////////////////////////////////// + + // Get the merge block if it exists. + ir::BasicBlock* if_merge_block = loop_->GetMergeBlock(); + // The merge block is only created if the loop has a unique exit block. We + // have this guarantee for structured loops, for compute loop it will + // trivially help maintain both a structured-like form and LCSAA. + ir::BasicBlock* loop_merge_block = + if_merge_block + ? CreateBasicBlock(FindBasicBlockPosition(if_merge_block)) + : nullptr; + if (loop_merge_block) { + // Add the instruction and update managers. + opt::InstructionBuilder builder( + context_, loop_merge_block, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + builder.AddBranch(if_merge_block->id()); + builder.SetInsertPoint(&*loop_merge_block->begin()); + cfg.RegisterBlock(loop_merge_block); + def_use_mgr->AnalyzeInstDef(loop_merge_block->GetLabelInst()); + // Update CFG. + if_merge_block->ForEachPhiInst( + [loop_merge_block, &builder, this](ir::Instruction* phi) { + ir::Instruction* cloned = phi->Clone(context_); + builder.AddInstruction(std::unique_ptr(cloned)); + phi->SetInOperand(0, {cloned->result_id()}); + phi->SetInOperand(1, {loop_merge_block->id()}); + for (uint32_t j = phi->NumInOperands() - 1; j > 1; j--) + phi->RemoveInOperand(j); + }); + // Copy the predecessor list (will get invalidated otherwise). + std::vector preds = cfg.preds(if_merge_block->id()); + for (uint32_t pid : preds) { + if (pid == loop_merge_block->id()) continue; + ir::BasicBlock* p_bb = cfg.block(pid); + p_bb->ForEachSuccessorLabel( + [if_merge_block, loop_merge_block](uint32_t* id) { + if (*id == if_merge_block->id()) *id = loop_merge_block->id(); + }); + cfg.AddEdge(pid, loop_merge_block->id()); + } + cfg.RemoveNonExistingEdges(if_merge_block->id()); + // Update loop descriptor. + if (ir::Loop* ploop = loop_->GetParent()) { + ploop->AddBasicBlock(loop_merge_block); + loop_desc_.SetBasicBlockToLoop(loop_merge_block->id(), ploop); + } + + // Update the dominator tree. + DominatorTreeNode* loop_merge_dtn = + dom_tree->GetOrInsertNode(loop_merge_block); + DominatorTreeNode* if_merge_block_dtn = + dom_tree->GetOrInsertNode(if_merge_block); + loop_merge_dtn->parent_ = if_merge_block_dtn->parent_; + loop_merge_dtn->children_.push_back(if_merge_block_dtn); + loop_merge_dtn->parent_->children_.push_back(loop_merge_dtn); + if_merge_block_dtn->parent_->children_.erase(std::find( + if_merge_block_dtn->parent_->children_.begin(), + if_merge_block_dtn->parent_->children_.end(), if_merge_block_dtn)); + + loop_->SetMergeBlock(loop_merge_block); + } + + //////////////////////////////////////////////////////////////////////////// + // Step 2: Build a new preheader for |loop_|, use the old one + // for the constant branch. + //////////////////////////////////////////////////////////////////////////// + + ir::BasicBlock* if_block = loop_->GetPreHeaderBlock(); + // If this preheader is the parent loop header, + // we need to create a dedicated block for the if. + ir::BasicBlock* loop_pre_header = + CreateBasicBlock(++FindBasicBlockPosition(if_block)); + opt::InstructionBuilder(context_, loop_pre_header, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping) + .AddBranch(loop_->GetHeaderBlock()->id()); + + if_block->tail()->SetInOperand(0, {loop_pre_header->id()}); + + // Update loop descriptor. + if (ir::Loop* ploop = loop_desc_[if_block]) { + ploop->AddBasicBlock(loop_pre_header); + loop_desc_.SetBasicBlockToLoop(loop_pre_header->id(), ploop); + } + + // Update the CFG. + cfg.RegisterBlock(loop_pre_header); + def_use_mgr->AnalyzeInstDef(loop_pre_header->GetLabelInst()); + cfg.AddEdge(if_block->id(), loop_pre_header->id()); + cfg.RemoveNonExistingEdges(loop_->GetHeaderBlock()->id()); + + loop_->GetHeaderBlock()->ForEachPhiInst( + [loop_pre_header, if_block](ir::Instruction* phi) { + phi->ForEachInId([loop_pre_header, if_block](uint32_t* id) { + if (*id == if_block->id()) { + *id = loop_pre_header->id(); + } + }); + }); + loop_->SetPreHeaderBlock(loop_pre_header); + + // Update the dominator tree. + DominatorTreeNode* loop_pre_header_dtn = + dom_tree->GetOrInsertNode(loop_pre_header); + DominatorTreeNode* if_block_dtn = dom_tree->GetTreeNode(if_block); + loop_pre_header_dtn->parent_ = if_block_dtn; + assert( + if_block_dtn->children_.size() == 1 && + "A loop preheader should only have the header block as a child in the " + "dominator tree"); + loop_pre_header_dtn->children_.push_back(if_block_dtn->children_[0]); + if_block_dtn->children_.clear(); + if_block_dtn->children_.push_back(loop_pre_header_dtn); + + // Make domination queries valid. + dom_tree->ResetDFNumbering(); + + // Compute an ordered list of basic block to clone: loop blocks + pre-header + // + merge block. + loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks_, true, true); + + ///////////////////////////// + // Do the actual unswitch: // + // - Clone the loop // + // - Connect exits // + // - Specialize the loop // + ///////////////////////////// + + ir::Instruction* iv_condition = &*switch_block_->tail(); + SpvOp iv_opcode = iv_condition->opcode(); + ir::Instruction* condition = + def_use_mgr->GetDef(iv_condition->GetOperand(0).words[0]); + + analysis::ConstantManager* cst_mgr = context_->get_constant_mgr(); + const analysis::Type* cond_type = + context_->get_type_mgr()->GetType(condition->type_id()); + + // Build the list of value for which we need to clone and specialize the + // loop. + std::vector> constant_branch; + // Special case for the original loop + ir::Instruction* original_loop_constant_value; + ir::BasicBlock* original_loop_target; + if (iv_opcode == SpvOpBranchConditional) { + constant_branch.emplace_back( + cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {0})), + nullptr); + original_loop_constant_value = + cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {1})); + } else { + // We are looking to take the default branch, so we can't provide a + // specific value. + original_loop_constant_value = nullptr; + for (uint32_t i = 2; i < iv_condition->NumInOperands(); i += 2) { + constant_branch.emplace_back( + cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant( + cond_type, iv_condition->GetInOperand(i).words)), + nullptr); + } + } + + // Get the loop landing pads. + std::unordered_set if_merging_blocks; + std::function is_from_original_loop; + if (loop_->GetHeaderBlock()->GetLoopMergeInst()) { + if_merging_blocks.insert(if_merge_block->id()); + is_from_original_loop = [this](uint32_t id) { + return loop_->IsInsideLoop(id) || loop_->GetMergeBlock()->id() == id; + }; + } else { + loop_->GetExitBlocks(&if_merging_blocks); + is_from_original_loop = [this](uint32_t id) { + return loop_->IsInsideLoop(id); + }; + } + + for (auto& specialisation_pair : constant_branch) { + ir::Instruction* specialisation_value = specialisation_pair.first; + ////////////////////////////////////////////////////////// + // Step 3: Duplicate |loop_|. + ////////////////////////////////////////////////////////// + LoopUtils::LoopCloningResult clone_result; + + ir::Loop* cloned_loop = + loop_utils.CloneLoop(&clone_result, ordered_loop_blocks_); + specialisation_pair.second = cloned_loop->GetPreHeaderBlock(); + + //////////////////////////////////// + // Step 4: Specialize the loop. // + //////////////////////////////////// + + { + std::unordered_set dead_blocks; + std::unordered_set unreachable_merges; + SimplifyLoop( + ir::make_range( + ir::UptrVectorIterator( + &clone_result.cloned_bb_, clone_result.cloned_bb_.begin()), + ir::UptrVectorIterator( + &clone_result.cloned_bb_, clone_result.cloned_bb_.end())), + cloned_loop, condition, specialisation_value, &dead_blocks); + + // We tagged dead blocks, create the loop before we invalidate any basic + // block. + cloned_loop = + CleanLoopNest(cloned_loop, dead_blocks, &unreachable_merges); + CleanUpCFG( + ir::UptrVectorIterator( + &clone_result.cloned_bb_, clone_result.cloned_bb_.begin()), + dead_blocks, unreachable_merges); + + /////////////////////////////////////////////////////////// + // Step 5: Connect convergent edges to the landing pads. // + /////////////////////////////////////////////////////////// + + for (uint32_t merge_bb_id : if_merging_blocks) { + ir::BasicBlock* merge = context_->cfg()->block(merge_bb_id); + // We are in LCSSA so we only care about phi instructions. + merge->ForEachPhiInst([is_from_original_loop, &dead_blocks, + &clone_result](ir::Instruction* phi) { + uint32_t num_in_operands = phi->NumInOperands(); + for (uint32_t i = 0; i < num_in_operands; i += 2) { + uint32_t pred = phi->GetSingleWordInOperand(i + 1); + if (is_from_original_loop(pred)) { + pred = clone_result.value_map_.at(pred); + if (!dead_blocks.count(pred)) { + uint32_t incoming_value_id = phi->GetSingleWordInOperand(i); + // Not all the incoming value are coming from the loop. + ValueMapTy::iterator new_value = + clone_result.value_map_.find(incoming_value_id); + if (new_value != clone_result.value_map_.end()) { + incoming_value_id = new_value->second; + } + phi->AddOperand({SPV_OPERAND_TYPE_ID, {incoming_value_id}}); + phi->AddOperand({SPV_OPERAND_TYPE_ID, {pred}}); + } + } + } + }); + } + } + function_->AddBasicBlocks(clone_result.cloned_bb_.begin(), + clone_result.cloned_bb_.end(), + ++FindBasicBlockPosition(if_block)); + } + + // Same as above but specialize the existing loop + { + std::unordered_set dead_blocks; + std::unordered_set unreachable_merges; + SimplifyLoop(ir::make_range(function_->begin(), function_->end()), loop_, + condition, original_loop_constant_value, &dead_blocks); + + for (uint32_t merge_bb_id : if_merging_blocks) { + ir::BasicBlock* merge = context_->cfg()->block(merge_bb_id); + // LCSSA, so we only care about phi instructions. + // If we the phi is reduced to a single incoming branch, do not + // propagate it to preserve LCSSA. + PatchPhis(merge, dead_blocks, true); + } + if (if_merge_block) { + bool has_live_pred = false; + for (uint32_t pid : cfg.preds(if_merge_block->id())) { + if (!dead_blocks.count(pid)) { + has_live_pred = true; + break; + } + } + if (!has_live_pred) unreachable_merges.insert(if_merge_block->id()); + } + original_loop_target = loop_->GetPreHeaderBlock(); + // We tagged dead blocks, prune the loop descriptor from any dead loops. + // After this call, |loop_| can be nullptr (i.e. the unswitch killed this + // loop). + loop_ = CleanLoopNest(loop_, dead_blocks, &unreachable_merges); + + CleanUpCFG(function_->begin(), dead_blocks, unreachable_merges); + } + + ///////////////////////////////////// + // Finally: connect the new loops. // + ///////////////////////////////////// + + // Delete the old jump + context_->KillInst(&*if_block->tail()); + opt::InstructionBuilder builder(context_, if_block); + if (iv_opcode == SpvOpBranchConditional) { + assert(constant_branch.size() == 1); + builder.AddConditionalBranch( + condition->result_id(), original_loop_target->id(), + constant_branch[0].second->id(), + if_merge_block ? if_merge_block->id() : kInvalidId); + } else { + std::vector, uint32_t>> targets; + for (auto& t : constant_branch) { + targets.emplace_back(t.first->GetInOperand(0).words, t.second->id()); + } + + builder.AddSwitch(condition->result_id(), original_loop_target->id(), + targets, + if_merge_block ? if_merge_block->id() : kInvalidId); + } + + switch_block_ = nullptr; + ordered_loop_blocks_.clear(); + + context_->InvalidateAnalysesExceptFor( + ir::IRContext::Analysis::kAnalysisLoopAnalysis); + } + + // Returns true if the unswitch killed the original |loop_|. + bool WasLoopKilled() const { return loop_ == nullptr; } + + private: + using ValueMapTy = std::unordered_map; + using BlockMapTy = std::unordered_map; + + ir::Function* function_; + ir::Loop* loop_; + ir::LoopDescriptor& loop_desc_; + ir::IRContext* context_; + + ir::BasicBlock* switch_block_; + // Map between instructions and if they are dynamically uniform. + std::unordered_map dynamically_uniform_; + // The loop basic blocks in structured order. + std::vector ordered_loop_blocks_; + + // Returns the next usable id for the context. + uint32_t TakeNextId() { return context_->TakeNextId(); } + + // Patches |bb|'s phi instruction by removing incoming value from unexisting + // or tagged as dead branches. + void PatchPhis(ir::BasicBlock* bb, + const std::unordered_set& dead_blocks, + bool preserve_phi) { + ir::CFG& cfg = *context_->cfg(); + + std::vector phi_to_kill; + const std::vector& bb_preds = cfg.preds(bb->id()); + auto is_branch_dead = [&bb_preds, &dead_blocks](uint32_t id) { + return dead_blocks.count(id) || + std::find(bb_preds.begin(), bb_preds.end(), id) == bb_preds.end(); + }; + bb->ForEachPhiInst([&phi_to_kill, &is_branch_dead, preserve_phi, + this](ir::Instruction* insn) { + uint32_t i = 0; + while (i < insn->NumInOperands()) { + uint32_t incoming_id = insn->GetSingleWordInOperand(i + 1); + if (is_branch_dead(incoming_id)) { + // Remove the incoming block id operand. + insn->RemoveInOperand(i + 1); + // Remove the definition id operand. + insn->RemoveInOperand(i); + continue; + } + i += 2; + } + // If there is only 1 remaining edge, propagate the value and + // kill the instruction. + if (insn->NumInOperands() == 2 && !preserve_phi) { + phi_to_kill.push_back(insn); + context_->ReplaceAllUsesWith(insn->result_id(), + insn->GetSingleWordInOperand(0)); + } + }); + for (ir::Instruction* insn : phi_to_kill) { + context_->KillInst(insn); + } + } + + // Removes any block that is tagged as dead, if the block is in + // |unreachable_merges| then all block's instructions are replaced by a + // OpUnreachable. + void CleanUpCFG(ir::UptrVectorIterator bb_it, + const std::unordered_set& dead_blocks, + const std::unordered_set& unreachable_merges) { + ir::CFG& cfg = *context_->cfg(); + + while (bb_it != bb_it.End()) { + ir::BasicBlock& bb = *bb_it; + + if (unreachable_merges.count(bb.id())) { + if (bb.begin() != bb.tail() || + bb.terminator()->opcode() != SpvOpUnreachable) { + // Make unreachable, but leave the label. + bb.KillAllInsts(false); + opt::InstructionBuilder(context_, &bb).AddUnreachable(); + cfg.RemoveNonExistingEdges(bb.id()); + } + ++bb_it; + } else if (dead_blocks.count(bb.id())) { + cfg.ForgetBlock(&bb); + // Kill this block. + bb.KillAllInsts(true); + bb_it = bb_it.Erase(); + } else { + cfg.RemoveNonExistingEdges(bb.id()); + ++bb_it; + } + } + } + + // Return true if |c_inst| is a Boolean constant and set |cond_val| with the + // value that |c_inst| + bool GetConstCondition(const ir::Instruction* c_inst, bool* cond_val) { + bool cond_is_const; + switch (c_inst->opcode()) { + case SpvOpConstantFalse: { + *cond_val = false; + cond_is_const = true; + } break; + case SpvOpConstantTrue: { + *cond_val = true; + cond_is_const = true; + } break; + default: { cond_is_const = false; } break; + } + return cond_is_const; + } + + // Simplifies |loop| assuming the instruction |to_version_insn| takes the + // value |cst_value|. |block_range| is an iterator range returning the loop + // basic blocks in a structured order (dominator first). + // The function will ignore basic blocks returned by |block_range| if they + // does not belong to the loop. + // The set |dead_blocks| will contain all the dead basic blocks. + // + // Requirements: + // - |loop| must be in the LCSSA form; + // - |cst_value| must be constant or null (to represent the default target + // of an OpSwitch). + void SimplifyLoop( + ir::IteratorRange> block_range, + ir::Loop* loop, ir::Instruction* to_version_insn, + ir::Instruction* cst_value, std::unordered_set* dead_blocks) { + ir::CFG& cfg = *context_->cfg(); + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + std::function ignore_node; + ignore_node = [loop](uint32_t bb_id) { return !loop->IsInsideLoop(bb_id); }; + + std::vector> use_list; + def_use_mgr->ForEachUse( + to_version_insn, [&use_list, &ignore_node, this]( + ir::Instruction* inst, uint32_t operand_index) { + ir::BasicBlock* bb = context_->get_instr_block(inst); + + if (!bb || ignore_node(bb->id())) { + // Out of the loop, the specialization does not apply any more. + return; + } + use_list.emplace_back(inst, operand_index); + }); + + // First pass: inject the specialized value into the loop (and only the + // loop). + for (auto use : use_list) { + ir::Instruction* inst = use.first; + uint32_t operand_index = use.second; + ir::BasicBlock* bb = context_->get_instr_block(inst); + + // If it is not a branch, simply inject the value. + if (!inst->IsBranch()) { + // To also handle switch, cst_value can be nullptr: this case + // means that we are looking to branch to the default target of + // the switch. We don't actually know its value so we don't touch + // it if it not a switch. + if (cst_value) { + inst->SetOperand(operand_index, {cst_value->result_id()}); + def_use_mgr->AnalyzeInstUse(inst); + } + } + + // The user is a branch, kill dead branches. + uint32_t live_target = 0; + std::unordered_set dead_branches; + switch (inst->opcode()) { + case SpvOpBranchConditional: { + assert(cst_value && "No constant value to specialize !"); + bool branch_cond = false; + if (GetConstCondition(cst_value, &branch_cond)) { + uint32_t true_label = + inst->GetSingleWordInOperand(kBranchCondTrueLabIdInIdx); + uint32_t false_label = + inst->GetSingleWordInOperand(kBranchCondFalseLabIdInIdx); + live_target = branch_cond ? true_label : false_label; + uint32_t dead_target = !branch_cond ? true_label : false_label; + cfg.RemoveEdge(bb->id(), dead_target); + } + break; + } + case SpvOpSwitch: { + live_target = inst->GetSingleWordInOperand(1); + if (cst_value) { + if (!cst_value->IsConstant()) break; + const ir::Operand& cst = cst_value->GetInOperand(0); + for (uint32_t i = 2; i < inst->NumInOperands(); i += 2) { + const ir::Operand& literal = inst->GetInOperand(i); + if (literal == cst) { + live_target = inst->GetSingleWordInOperand(i + 1); + break; + } + } + } + for (uint32_t i = 1; i < inst->NumInOperands(); i += 2) { + uint32_t id = inst->GetSingleWordInOperand(i); + if (id != live_target) { + cfg.RemoveEdge(bb->id(), id); + } + } + } + default: + break; + } + if (live_target != 0) { + // Check for the presence of the merge block. + if (ir::Instruction* merge = bb->GetMergeInst()) + context_->KillInst(merge); + context_->KillInst(&*bb->tail()); + opt::InstructionBuilder builder( + context_, bb, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + builder.AddBranch(live_target); + } + } + + // Go through the loop basic block and tag all blocks that are obviously + // dead. + std::unordered_set visited; + for (ir::BasicBlock& bb : block_range) { + if (ignore_node(bb.id())) continue; + visited.insert(bb.id()); + + // Check if this block is dead, if so tag it as dead otherwise patch phi + // instructions. + bool has_live_pred = false; + for (uint32_t pid : cfg.preds(bb.id())) { + if (!dead_blocks->count(pid)) { + has_live_pred = true; + break; + } + } + if (!has_live_pred) { + dead_blocks->insert(bb.id()); + const ir::BasicBlock& cbb = bb; + // Patch the phis for any back-edge. + cbb.ForEachSuccessorLabel( + [dead_blocks, &visited, &cfg, this](uint32_t id) { + if (!visited.count(id) || dead_blocks->count(id)) return; + ir::BasicBlock* succ = cfg.block(id); + PatchPhis(succ, *dead_blocks, false); + }); + continue; + } + // Update the phi instructions, some incoming branch have/will disappear. + PatchPhis(&bb, *dead_blocks, /* preserve_phi = */ false); + } + } + + // Returns true if the header is not reachable or tagged as dead or if we + // never loop back. + bool IsLoopDead(ir::BasicBlock* header, ir::BasicBlock* latch, + const std::unordered_set& dead_blocks) { + if (!header || dead_blocks.count(header->id())) return true; + if (!latch || dead_blocks.count(latch->id())) return true; + for (uint32_t pid : context_->cfg()->preds(header->id())) { + if (!dead_blocks.count(pid)) { + // Seems reachable. + return false; + } + } + return true; + } + + // Cleans the loop nest under |loop| and reflect changes to the loop + // descriptor. This will kill all descriptors that represent dead loops. + // If |loop_| is killed, it will be set to nullptr. + // Any merge blocks that become unreachable will be added to + // |unreachable_merges|. + // The function returns the pointer to |loop| or nullptr if the loop was + // killed. + ir::Loop* CleanLoopNest(ir::Loop* loop, + const std::unordered_set& dead_blocks, + std::unordered_set* unreachable_merges) { + // This represent the pair of dead loop and nearest alive parent (nullptr if + // no parent). + std::unordered_map dead_loops; + auto get_parent = [&dead_loops](ir::Loop* l) -> ir::Loop* { + std::unordered_map::iterator it = + dead_loops.find(l); + if (it != dead_loops.end()) return it->second; + return nullptr; + }; + + bool is_main_loop_dead = + IsLoopDead(loop->GetHeaderBlock(), loop->GetLatchBlock(), dead_blocks); + if (is_main_loop_dead) { + if (ir::Instruction* merge = loop->GetHeaderBlock()->GetLoopMergeInst()) { + context_->KillInst(merge); + } + dead_loops[loop] = loop->GetParent(); + } else + dead_loops[loop] = loop; + // For each loop, check if we killed it. If we did, find a suitable parent + // for its children. + for (ir::Loop& sub_loop : + ir::make_range(++opt::TreeDFIterator(loop), + opt::TreeDFIterator())) { + if (IsLoopDead(sub_loop.GetHeaderBlock(), sub_loop.GetLatchBlock(), + dead_blocks)) { + if (ir::Instruction* merge = + sub_loop.GetHeaderBlock()->GetLoopMergeInst()) { + context_->KillInst(merge); + } + dead_loops[&sub_loop] = get_parent(&sub_loop); + } else { + // The loop is alive, check if its merge block is dead, if it is, tag it + // as required. + if (sub_loop.GetMergeBlock()) { + uint32_t merge_id = sub_loop.GetMergeBlock()->id(); + if (dead_blocks.count(merge_id)) { + unreachable_merges->insert(sub_loop.GetMergeBlock()->id()); + } + } + } + } + if (!is_main_loop_dead) dead_loops.erase(loop); + + // Remove dead blocks from live loops. + for (uint32_t bb_id : dead_blocks) { + ir::Loop* l = loop_desc_[bb_id]; + if (l) { + l->RemoveBasicBlock(bb_id); + loop_desc_.ForgetBasicBlock(bb_id); + } + } + + std::for_each( + dead_loops.begin(), dead_loops.end(), + [&loop, this]( + std::unordered_map::iterator::reference it) { + if (it.first == loop) loop = nullptr; + loop_desc_.RemoveLoop(it.first); + }); + + return loop; + } + + // Returns true if |var| is dynamically uniform. + // Note: this is currently approximated as uniform. + bool IsDynamicallyUniform(ir::Instruction* var, const ir::BasicBlock* entry, + const DominatorTree& post_dom_tree) { + assert(post_dom_tree.IsPostDominator()); + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + auto it = dynamically_uniform_.find(var->result_id()); + + if (it != dynamically_uniform_.end()) return it->second; + + analysis::DecorationManager* dec_mgr = context_->get_decoration_mgr(); + + bool& is_uniform = dynamically_uniform_[var->result_id()]; + is_uniform = false; + + dec_mgr->WhileEachDecoration(var->result_id(), SpvDecorationUniform, + [&is_uniform](const ir::Instruction&) { + is_uniform = true; + return false; + }); + if (is_uniform) { + return is_uniform; + } + + ir::BasicBlock* parent = context_->get_instr_block(var); + if (!parent) { + return is_uniform = true; + } + + if (!post_dom_tree.Dominates(parent->id(), entry->id())) { + return is_uniform = false; + } + if (var->opcode() == SpvOpLoad) { + const uint32_t PtrTypeId = + def_use_mgr->GetDef(var->GetSingleWordInOperand(0))->type_id(); + const ir::Instruction* PtrTypeInst = def_use_mgr->GetDef(PtrTypeId); + uint32_t storage_class = + PtrTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx); + if (storage_class != SpvStorageClassUniform && + storage_class != SpvStorageClassUniformConstant) { + return is_uniform = false; + } + } else { + if (!context_->IsCombinatorInstruction(var)) { + return is_uniform = false; + } + } + + return is_uniform = var->WhileEachInId([entry, &post_dom_tree, + this](const uint32_t* id) { + return IsDynamicallyUniform(context_->get_def_use_mgr()->GetDef(*id), + entry, post_dom_tree); + }); + } + + // Returns true if |insn| is constant and dynamically uniform within the loop. + bool IsConditionLoopInvariant(ir::Instruction* insn) { + assert(insn->IsBranch()); + assert(insn->opcode() != SpvOpBranch); + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + ir::Instruction* condition = + def_use_mgr->GetDef(insn->GetOperand(0).words[0]); + return !loop_->IsInsideLoop(condition) && + IsDynamicallyUniform( + condition, function_->entry().get(), + context_->GetPostDominatorAnalysis(function_, *context_->cfg()) + ->GetDomTree()); + } +}; + +} // namespace + +Pass::Status LoopUnswitchPass::Process(ir::IRContext* c) { + InitializeProcessing(c); + + bool modified = false; + ir::Module* module = c->module(); + + // Process each function in the module + for (ir::Function& f : *module) { + modified |= ProcessFunction(&f); + } + + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +bool LoopUnswitchPass::ProcessFunction(ir::Function* f) { + bool modified = false; + std::unordered_set processed_loop; + + ir::LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f); + + bool loop_changed = true; + while (loop_changed) { + loop_changed = false; + for (ir::Loop& loop : + ir::make_range(++opt::TreeDFIterator( + loop_descriptor.GetDummyRootLoop()), + opt::TreeDFIterator())) { + if (processed_loop.count(&loop)) continue; + processed_loop.insert(&loop); + + LoopUnswitch unswitcher(context(), f, &loop, &loop_descriptor); + while (!unswitcher.WasLoopKilled() && unswitcher.CanUnswitchLoop()) { + if (!loop.IsLCSSA()) { + LoopUtils(context(), &loop).MakeLoopClosedSSA(); + } + modified = true; + loop_changed = true; + unswitcher.PerformUnswitch(); + } + if (loop_changed) break; + } + } + + return modified; +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/loop_unswitch_pass.h b/source/opt/loop_unswitch_pass.h new file mode 100644 index 000000000..dbe581479 --- /dev/null +++ b/source/opt/loop_unswitch_pass.h @@ -0,0 +1,43 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_LOOP_UNSWITCH_PASS_H_ +#define LIBSPIRV_OPT_LOOP_UNSWITCH_PASS_H_ + +#include "opt/loop_descriptor.h" +#include "opt/pass.h" + +namespace spvtools { +namespace opt { + +// Implements the loop unswitch optimization. +// The loop unswitch hoists invariant "if" statements if the conditions are +// constant within the loop and clones the loop for each branch. +class LoopUnswitchPass : public Pass { + public: + const char* name() const override { return "loop-unswitch"; } + + // Processes the given |module|. Returns Status::Failure if errors occur when + // processing. Returns the corresponding Status::Success if processing is + // succesful to indicate whether changes have been made to the modue. + Pass::Status Process(ir::IRContext* context) override; + + private: + bool ProcessFunction(ir::Function* f); +}; + +} // namespace opt +} // namespace spvtools + +#endif // !LIBSPIRV_OPT_LOOP_UNSWITCH_PASS_H_ diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp index 6c2a15f9c..853267903 100644 --- a/source/opt/loop_utils.cpp +++ b/source/opt/loop_utils.cpp @@ -18,6 +18,7 @@ #include #include +#include "cfa.h" #include "opt/cfg.h" #include "opt/ir_builder.h" #include "opt/ir_context.h" @@ -481,5 +482,114 @@ void LoopUtils::MakeLoopClosedSSA() { ir::IRContext::Analysis::kAnalysisLoopAnalysis); } +ir::Loop* LoopUtils::CloneLoop( + LoopCloningResult* cloning_result, + const std::vector& ordered_loop_blocks) const { + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + std::unique_ptr new_loop = MakeUnique(context_); + if (loop_->HasParent()) new_loop->SetParent(loop_->GetParent()); + + ir::CFG& cfg = *context_->cfg(); + + // Clone and place blocks in a SPIR-V compliant order (dominators first). + for (ir::BasicBlock* old_bb : ordered_loop_blocks) { + // For each basic block in the loop, we clone it and register the mapping + // between old and new ids. + ir::BasicBlock* new_bb = old_bb->Clone(context_); + new_bb->SetParent(&function_); + new_bb->GetLabelInst()->SetResultId(context_->TakeNextId()); + def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst()); + context_->set_instr_block(new_bb->GetLabelInst(), new_bb); + cloning_result->cloned_bb_.emplace_back(new_bb); + + cloning_result->old_to_new_bb_[old_bb->id()] = new_bb; + cloning_result->new_to_old_bb_[new_bb->id()] = old_bb; + cloning_result->value_map_[old_bb->id()] = new_bb->id(); + + if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb); + + for (auto& inst : *new_bb) { + if (inst.HasResultId()) { + uint32_t old_result_id = inst.result_id(); + inst.SetResultId(context_->TakeNextId()); + cloning_result->value_map_[old_result_id] = inst.result_id(); + + // Only look at the defs for now, uses are not updated yet. + def_use_mgr->AnalyzeInstDef(&inst); + } + } + } + + // All instructions (including all labels) have been cloned, + // remap instruction operands id with the new ones. + for (std::unique_ptr& bb_ref : cloning_result->cloned_bb_) { + ir::BasicBlock* bb = bb_ref.get(); + + for (ir::Instruction& insn : *bb) { + insn.ForEachInId([cloning_result](uint32_t* old_id) { + // If the operand is defined in the loop, remap the id. + auto id_it = cloning_result->value_map_.find(*old_id); + if (id_it != cloning_result->value_map_.end()) { + *old_id = id_it->second; + } + }); + // Only look at what the instruction uses. All defs are register, so all + // should be fine now. + def_use_mgr->AnalyzeInstUse(&insn); + context_->set_instr_block(&insn, bb); + } + cfg.RegisterBlock(bb); + } + + PopulateLoopNest(new_loop.get(), *cloning_result); + + return new_loop.release(); +} + +void LoopUtils::PopulateLoopNest( + ir::Loop* new_loop, const LoopCloningResult& cloning_result) const { + std::unordered_map loop_mapping; + loop_mapping[loop_] = new_loop; + + if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop); + PopulateLoopDesc(new_loop, loop_, cloning_result); + + for (ir::Loop& sub_loop : + ir::make_range(++opt::TreeDFIterator(loop_), + opt::TreeDFIterator())) { + ir::Loop* cloned = new ir::Loop(context_); + if (ir::Loop* parent = loop_mapping[sub_loop.GetParent()]) + parent->AddNestedLoop(cloned); + loop_mapping[&sub_loop] = cloned; + PopulateLoopDesc(cloned, &sub_loop, cloning_result); + } + + loop_desc_->AddLoopNest(std::unique_ptr(new_loop)); +} + +// Populates |new_loop| descriptor according to |old_loop|'s one. +void LoopUtils::PopulateLoopDesc( + ir::Loop* new_loop, ir::Loop* old_loop, + const LoopCloningResult& cloning_result) const { + for (uint32_t bb_id : old_loop->GetBlocks()) { + ir::BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id); + new_loop->AddBasicBlock(bb); + } + new_loop->SetHeaderBlock( + cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id())); + if (old_loop->GetLatchBlock()) + new_loop->SetLatchBlock( + cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id())); + if (old_loop->GetMergeBlock()) { + ir::BasicBlock* bb = + cloning_result.old_to_new_bb_.at(old_loop->GetMergeBlock()->id()); + new_loop->SetMergeBlock(bb); + } + if (old_loop->GetPreHeaderBlock()) + new_loop->SetPreHeaderBlock( + cloning_result.old_to_new_bb_.at(old_loop->GetPreHeaderBlock()->id())); +} + } // namespace opt } // namespace spvtools diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h index 89e693679..0e77bb6b8 100644 --- a/source/opt/loop_utils.h +++ b/source/opt/loop_utils.h @@ -17,15 +17,11 @@ #include #include #include +#include "opt/ir_context.h" #include "opt/loop_descriptor.h" namespace spvtools { -namespace ir { -class Loop; -class IRContext; -} // namespace ir - namespace opt { // LoopUtils is used to encapsulte loop optimizations and from the passes which @@ -33,8 +29,25 @@ namespace opt { // or through a pass which is using this. class LoopUtils { public: + // Holds a auxiliary results of the loop cloning procedure. + struct LoopCloningResult { + using ValueMapTy = std::unordered_map; + using BlockMapTy = std::unordered_map; + + // Mapping between the original loop ids and the new one. + ValueMapTy value_map_; + // Mapping between original loop blocks to the cloned one. + BlockMapTy old_to_new_bb_; + // Mapping between the cloned loop blocks to original one. + BlockMapTy new_to_old_bb_; + // List of cloned basic block. + std::vector> cloned_bb_; + }; + LoopUtils(ir::IRContext* context, ir::Loop* loop) : context_(context), + loop_desc_( + context->GetLoopDescriptor(loop->GetHeaderBlock()->GetParent())), loop_(loop), function_(*loop_->GetHeaderBlock()->GetParent()) {} @@ -72,6 +85,17 @@ class LoopUtils { // Preserves: CFG, def/use and instruction to block mapping. void CreateLoopDedicatedExits(); + // Clone |loop_| and remap its instructions. Newly created blocks + // will be added to the |cloning_result.cloned_bb_| list, correctly ordered to + // be inserted into a function. If the loop is structured, the merge construct + // will also be cloned. The function preserves the def/use, cfg and instr to + // block analyses. + // The cloned loop nest will be added to the loop descriptor and will have + // owner ship. + ir::Loop* CloneLoop( + LoopCloningResult* cloning_result, + const std::vector& ordered_loop_blocks) const; + // Perfom a partial unroll of |loop| by given |factor|. This will copy the // body of the loop |factor| times. So a |factor| of one would give a new loop // with the original body plus one unrolled copy body. @@ -103,8 +127,17 @@ class LoopUtils { private: ir::IRContext* context_; + ir::LoopDescriptor* loop_desc_; ir::Loop* loop_; ir::Function& function_; + + // Populates the loop nest of |new_loop| according to |loop_| nest. + void PopulateLoopNest(ir::Loop* new_loop, + const LoopCloningResult& cloning_result) const; + + // Populates |new_loop| descriptor according to |old_loop|'s one. + void PopulateLoopDesc(ir::Loop* new_loop, ir::Loop* old_loop, + const LoopCloningResult& cloning_result) const; }; } // namespace opt diff --git a/source/opt/mem_pass.cpp b/source/opt/mem_pass.cpp index 399647776..86b84a0a0 100644 --- a/source/opt/mem_pass.cpp +++ b/source/opt/mem_pass.cpp @@ -137,11 +137,7 @@ bool MemPass::HasOnlyNamesAndDecorates(uint32_t id) const { } void MemPass::KillAllInsts(ir::BasicBlock* bp, bool killLabel) { - bp->ForEachInst([this, killLabel](ir::Instruction* ip) { - if (killLabel || ip->opcode() != SpvOpLabel) { - context()->KillInst(ip); - } - }); + bp->KillAllInsts(killLabel); } bool MemPass::HasLoads(uint32_t varId) const { diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index dced5db53..c52e64351 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -356,6 +356,11 @@ Optimizer::PassToken CreateLoopInvariantCodeMotionPass() { return MakeUnique(MakeUnique()); } +Optimizer::PassToken CreateLoopUnswitchPass() { + return MakeUnique( + MakeUnique()); +} + Optimizer::PassToken CreateRedundancyEliminationPass() { return MakeUnique( MakeUnique()); diff --git a/source/opt/passes.h b/source/opt/passes.h index 9fb98aaf0..f0fb289a1 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -42,6 +42,7 @@ #include "local_single_store_elim_pass.h" #include "local_ssa_elim_pass.h" #include "loop_unroller.h" +#include "loop_unswitch_pass.h" #include "merge_return_pass.h" #include "null_pass.h" #include "private_to_local_pass.h" diff --git a/test/opt/loop_optimizations/CMakeLists.txt b/test/opt/loop_optimizations/CMakeLists.txt index a9cb49916..3a1144d72 100644 --- a/test/opt/loop_optimizations/CMakeLists.txt +++ b/test/opt/loop_optimizations/CMakeLists.txt @@ -79,3 +79,8 @@ add_spvtools_unittest(TARGET loop_unroll_assumtion_checks LIBS SPIRV-Tools-opt ) +add_spvtools_unittest(TARGET unswitch_test + SRCS ../function_utils.h + unswitch.cpp + LIBS SPIRV-Tools-opt +) diff --git a/test/opt/loop_optimizations/loop_descriptions.cpp b/test/opt/loop_optimizations/loop_descriptions.cpp index 90e585e0b..f53ad0508 100644 --- a/test/opt/loop_optimizations/loop_descriptions.cpp +++ b/test/opt/loop_optimizations/loop_descriptions.cpp @@ -203,4 +203,98 @@ TEST_F(PassClassTest, LoopWithNoPreHeader) { EXPECT_NE(loop->GetOrCreatePreHeaderBlock(), nullptr); } +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + int i = 0; + bool cond = c[0] == 0; + for (; i < 10; i++) { + if (cond) { + return; + } + else { + return; + } + } + bool cond2 = i == 9; +} +*/ +TEST_F(PassClassTest, NoLoop) { + const std::string text = R"(; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 3 +; Bound: 47 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %4 "main" %16 + OpExecutionMode %4 OriginUpperLeft + OpSource GLSL 330 + OpName %4 "main" + OpName %16 "c" + OpDecorate %16 Location 0 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeInt 32 1 + %7 = OpTypePointer Function %6 + %9 = OpConstant %6 0 + %10 = OpTypeBool + %11 = OpTypePointer Function %10 + %13 = OpTypeFloat 32 + %14 = OpTypeVector %13 4 + %15 = OpTypePointer Input %14 + %16 = OpVariable %15 Input + %17 = OpTypeInt 32 0 + %18 = OpConstant %17 0 + %19 = OpTypePointer Input %13 + %22 = OpConstant %13 0 + %30 = OpConstant %6 10 + %39 = OpConstant %6 1 + %46 = OpUndef %6 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %20 = OpAccessChain %19 %16 %18 + %21 = OpLoad %13 %20 + %23 = OpFOrdEqual %10 %21 %22 + OpBranch %24 + %24 = OpLabel + %45 = OpPhi %6 %9 %5 %40 %27 + OpLoopMerge %26 %27 None + OpBranch %28 + %28 = OpLabel + %31 = OpSLessThan %10 %45 %30 + OpBranchConditional %31 %25 %26 + %25 = OpLabel + OpSelectionMerge %34 None + OpBranchConditional %23 %33 %36 + %33 = OpLabel + OpReturn + %36 = OpLabel + OpReturn + %34 = OpLabel + OpBranch %27 + %27 = OpLabel + %40 = OpIAdd %6 %46 %39 + OpBranch %24 + %26 = OpLabel + OpReturn + OpFunctionEnd + )"; + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + const ir::Function* f = spvtest::GetFunction(module, 4); + ir::LoopDescriptor ld{f}; + + EXPECT_EQ(ld.NumLoops(), 0u); +} + } // namespace diff --git a/test/opt/loop_optimizations/unswitch.cpp b/test/opt/loop_optimizations/unswitch.cpp new file mode 100644 index 000000000..d5c5209aa --- /dev/null +++ b/test/opt/loop_optimizations/unswitch.cpp @@ -0,0 +1,914 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#ifdef SPIRV_EFFCEE +#include "effcee/effcee.h" +#endif + +#include "../pass_fixture.h" + +namespace { + +using namespace spvtools; + +using UnswitchTest = PassTest<::testing::Test>; + +#ifdef SPIRV_EFFCEE + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 450 core +uniform vec4 c; +void main() { + int i = 0; + int j = 0; + bool cond = c[0] == 0; + for (; i < 10; i++, j++) { + if (cond) { + i++; + } + else { + j++; + } + } +} +*/ +TEST_F(UnswitchTest, SimpleUnswitch) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpFOrdEqual +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false. +; CHECK: [[loop_f]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: [[phi_j:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_j:%\w+]] [[continue]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=1 and j+=2. +; CHECK: [[phi_j:%\w+]] = OpIAdd %int [[phi_j]] %int_1 +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_j]] = OpIAdd %int [[phi_j]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: [[phi_j:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_j:%\w+]] [[continue]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=2 and j+=1. +; CHECK: [[phi_i:%\w+]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_j]] = OpIAdd %int [[phi_j]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %c DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_UniformConstant_v4float = OpTypePointer UniformConstant %v4float + %c = OpVariable %_ptr_UniformConstant_v4float UniformConstant + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_UniformConstant_float = OpTypePointer UniformConstant %float + %float_0 = OpConstant %float 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %21 = OpAccessChain %_ptr_UniformConstant_float %c %uint_0 + %22 = OpLoad %float %21 + %24 = OpFOrdEqual %bool %22 %float_0 + OpBranch %25 + %25 = OpLabel + %46 = OpPhi %int %int_0 %5 %43 %28 + %47 = OpPhi %int %int_0 %5 %45 %28 + OpLoopMerge %27 %28 None + OpBranch %29 + %29 = OpLabel + %32 = OpSLessThan %bool %46 %int_10 + OpBranchConditional %32 %26 %27 + %26 = OpLabel + OpSelectionMerge %35 None + OpBranchConditional %24 %34 %39 + %34 = OpLabel + %38 = OpIAdd %int %46 %int_1 + OpBranch %35 + %39 = OpLabel + %41 = OpIAdd %int %47 %int_1 + OpBranch %35 + %35 = OpLabel + %48 = OpPhi %int %38 %34 %46 %39 + %49 = OpPhi %int %47 %34 %41 %39 + OpBranch %28 + %28 = OpLabel + %43 = OpIAdd %int %48 %int_1 + %45 = OpIAdd %int %49 %int_1 + OpBranch %25 + %27 = OpLabel + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(text, true); +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + int i = 0; + bool cond = c[0] == 0; + for (; i < 10; i++) { + if (cond) { + i++; + } + else { + return; + } + } +} +*/ +TEST_F(UnswitchTest, UnswitchExit) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpFOrdEqual +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false. +; CHECK: [[loop_f]] = OpLabel +; CHECK: OpReturn + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=2. +; CHECK: [[phi_i:%\w+]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %23 Uniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_0 = OpConstant %float 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_float %c %uint_0 + %21 = OpLoad %float %20 + %23 = OpFOrdEqual %bool %21 %float_0 + OpBranch %24 + %24 = OpLabel + %42 = OpPhi %int %int_0 %5 %41 %27 + OpLoopMerge %26 %27 None + OpBranch %28 + %28 = OpLabel + %31 = OpSLessThan %bool %42 %int_10 + OpBranchConditional %31 %25 %26 + %25 = OpLabel + OpSelectionMerge %34 None + OpBranchConditional %23 %33 %38 + %33 = OpLabel + %37 = OpIAdd %int %42 %int_1 + OpBranch %34 + %38 = OpLabel + OpReturn + %34 = OpLabel + OpBranch %27 + %27 = OpLabel + %41 = OpIAdd %int %37 %int_1 + OpBranch %24 + %26 = OpLabel + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(text, true); +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + int i = 0; + bool cond = c[0] == 0; + for (; i < 10; i++) { + if (cond) { + continue; + } + else { + i++; + } + } +} +*/ +TEST_F(UnswitchTest, UnswitchContinue) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpFOrdEqual +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false. +; CHECK: [[loop_f]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=2. +; CHECK: [[phi_i:%\w+]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=1. +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %23 Uniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_0 = OpConstant %float 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_float %c %uint_0 + %21 = OpLoad %float %20 + %23 = OpFOrdEqual %bool %21 %float_0 + OpBranch %24 + %24 = OpLabel + %42 = OpPhi %int %int_0 %5 %41 %27 + OpLoopMerge %26 %27 None + OpBranch %28 + %28 = OpLabel + %31 = OpSLessThan %bool %42 %int_10 + OpBranchConditional %31 %25 %26 + %25 = OpLabel + OpSelectionMerge %34 None + OpBranchConditional %23 %33 %36 + %33 = OpLabel + OpBranch %27 + %36 = OpLabel + %39 = OpIAdd %int %42 %int_1 + OpBranch %34 + %34 = OpLabel + OpBranch %27 + %27 = OpLabel + %43 = OpPhi %int %42 %33 %39 %34 + %41 = OpIAdd %int %43 %int_1 + OpBranch %24 + %26 = OpLabel + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(text, true); +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + int i = 0; + bool cond = c[0] == 0; + for (; i < 10; i++) { + if (cond) { + i++; + } + else { + break; + } + } +} +*/ +TEST_F(UnswitchTest, UnswitchKillLoop) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpFOrdEqual +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false. +; CHECK: [[loop_f]] = OpLabel +; CHECK: OpBranch [[if_merge]] + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=2. +; CHECK: [[phi_i:%\w+]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %23 Uniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_0 = OpConstant %float 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_float %c %uint_0 + %21 = OpLoad %float %20 + %23 = OpFOrdEqual %bool %21 %float_0 + OpBranch %24 + %24 = OpLabel + %42 = OpPhi %int %int_0 %5 %41 %27 + OpLoopMerge %26 %27 None + OpBranch %28 + %28 = OpLabel + %31 = OpSLessThan %bool %42 %int_10 + OpBranchConditional %31 %25 %26 + %25 = OpLabel + OpSelectionMerge %34 None + OpBranchConditional %23 %33 %38 + %33 = OpLabel + %37 = OpIAdd %int %42 %int_1 + OpBranch %34 + %38 = OpLabel + OpBranch %26 + %34 = OpLabel + OpBranch %27 + %27 = OpLabel + %41 = OpIAdd %int %37 %int_1 + OpBranch %24 + %26 = OpLabel + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(text, true); +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + int i = 0; + int cond = int(c[0]); + for (; i < 10; i++) { + switch (cond) { + case 0: + return; + case 1: + discard; + case 2: + break; + default: + break; + } + } + bool cond2 = i == 9; +} +*/ +TEST_F(UnswitchTest, UnswitchSwitch) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpConvertFToS +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpSwitch [[cst_cond]] [[default:%\w+]] 0 [[loop_0:%\w+]] 1 [[loop_1:%\w+]] 2 [[loop_2:%\w+]] + +; Loop specialized for 2. +; CHECK: [[loop_2]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_2]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=1. +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: OpBranch [[loop]] + +; Loop specialized for 1. +; CHECK: [[loop_1]] = OpLabel +; CHECK: OpKill + +; Loop specialized for 0. +; CHECK: [[loop_0]] = OpLabel +; CHECK: OpReturn + +; Loop specialized for the default case. +; CHECK: [[default]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[default]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have i+=1. +; CHECK: [[phi_i:%\w+]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK: OpBranch [[loop]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %20 Uniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 +%_ptr_Function_bool = OpTypePointer Function %bool + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_float %c %uint_0 + %19 = OpLoad %float %18 + %20 = OpConvertFToS %int %19 + OpBranch %21 + %21 = OpLabel + %49 = OpPhi %int %int_0 %5 %43 %24 + OpLoopMerge %23 %24 None + OpBranch %25 + %25 = OpLabel + %29 = OpSLessThan %bool %49 %int_10 + OpBranchConditional %29 %22 %23 + %22 = OpLabel + OpSelectionMerge %35 None + OpSwitch %20 %34 0 %31 1 %32 2 %33 + %34 = OpLabel + OpBranch %35 + %31 = OpLabel + OpReturn + %32 = OpLabel + OpKill + %33 = OpLabel + OpBranch %35 + %35 = OpLabel + OpBranch %24 + %24 = OpLabel + %43 = OpIAdd %int %49 %int_1 + OpBranch %21 + %23 = OpLabel + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(text, true); +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 440 core +layout(location = 0)in vec4 c; +void main() { + int i = 0; + int j = 0; + int k = 0; + bool cond = c[0] == 0; + for (; i < 10; i++) { + for (; j < 10; j++) { + if (cond) { + i++; + } else { + j++; + } + } + } + for (; k < 10; k++) { + if (cond) { + k++; + } + } +} +*/ +TEST_F(UnswitchTest, UnSwitchNested) { + const std::string text = R"( +; CHECK: [[cst_cond:%\w+]] = OpFOrdEqual +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false, one loop is killed, j won't change anymore. +; CHECK: [[loop_f]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: [[phi_j:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_j:%\w+]] [[continue]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[iv_i]] = OpIAdd %int [[phi_i]] %int_1 +; CHECK-NEXT: OpBranch [[loop]] +; CHECK: OpReturn + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_i:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_i:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: [[phi_j:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_j:%\w+]] [[continue]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_i]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] [[pre_loop_inner:%\w+]] [[merge]] + +; CHECK: [[pre_loop_inner]] = OpLabel +; CHECK-NEXT: OpBranch [[loop_inner:%\w+]] +; CHECK-NEXT: [[loop_inner]] = OpLabel +; CHECK-NEXT: [[phi2_i:%\w+]] = OpPhi %int [[phi_i]] [[pre_loop_inner]] [[iv2_i:%\w+]] [[continue2:%\w+]] +; CHECK-NEXT: [[phi2_j:%\w+]] = OpPhi %int [[phi_j]] [[pre_loop_inner]] [[iv2_j:%\w+]] [[continue2]] +; CHECK-NEXT: OpLoopMerge [[merge2:%\w+]] [[continue2]] None + +; CHECK: OpBranch [[continue2]] +; CHECK: [[merge2]] = OpLabel +; CHECK: OpBranch [[continue]] +; CHECK: [[merge]] = OpLabel + +; Unswitched double nested loop is done. Test the single remaining one. + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpSelectionMerge [[if_merge:%\w+]] None +; CHECK-NEXT: OpBranchConditional [[cst_cond]] [[loop_t:%\w+]] [[loop_f:%\w+]] + +; Loop specialized for false. +; CHECK: [[loop_f]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_k:%\w+]] = OpPhi %int %int_0 [[loop_f]] [[iv_k:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_k]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have k+=1 +; CHECK: [[iv_k]] = OpIAdd %int [[phi_k]] %int_1 +; CHECK: OpBranch [[loop]] +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; Loop specialized for true. +; CHECK: [[loop_t]] = OpLabel +; CHECK-NEXT: OpBranch [[loop:%\w+]] +; CHECK: [[loop]] = OpLabel +; CHECK-NEXT: [[phi_k:%\w+]] = OpPhi %int %int_0 [[loop_t]] [[iv_k:%\w+]] [[continue:%\w+]] +; CHECK-NEXT: OpLoopMerge [[merge:%\w+]] [[continue]] None +; CHECK: [[loop_exit:%\w+]] = OpSLessThan {{%\w+}} [[phi_k]] {{%\w+}} +; CHECK-NEXT: OpBranchConditional [[loop_exit]] {{%\w+}} [[merge]] +; Check that we have k+=2. +; CHECK: [[tmp_k:%\w+]] = OpIAdd %int [[phi_k]] %int_1 +; CHECK: [[iv_k]] = OpIAdd %int [[tmp_k]] %int_1 +; CHECK: OpBranch [[loop]] +; CHECK: [[merge]] = OpLabel +; CHECK-NEXT: OpBranch [[if_merge]] + +; CHECK: [[if_merge]] = OpLabel +; CHECK-NEXT: OpReturn + + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 440 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + OpDecorate %25 Uniform + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_0 = OpConstant %float 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Input_float %c %uint_0 + %23 = OpLoad %float %22 + %25 = OpFOrdEqual %bool %23 %float_0 + OpBranch %26 + %26 = OpLabel + %67 = OpPhi %int %int_0 %5 %52 %29 + %68 = OpPhi %int %int_0 %5 %70 %29 + OpLoopMerge %28 %29 None + OpBranch %30 + %30 = OpLabel + %33 = OpSLessThan %bool %67 %int_10 + OpBranchConditional %33 %27 %28 + %27 = OpLabel + OpBranch %34 + %34 = OpLabel + %69 = OpPhi %int %67 %27 %46 %37 + %70 = OpPhi %int %68 %27 %50 %37 + OpLoopMerge %36 %37 None + OpBranch %38 + %38 = OpLabel + %40 = OpSLessThan %bool %70 %int_10 + OpBranchConditional %40 %35 %36 + %35 = OpLabel + OpSelectionMerge %43 None + OpBranchConditional %25 %42 %47 + %42 = OpLabel + %46 = OpIAdd %int %69 %int_1 + OpBranch %43 + %47 = OpLabel + OpReturn + %43 = OpLabel + OpBranch %37 + %37 = OpLabel + %50 = OpIAdd %int %70 %int_1 + OpBranch %34 + %36 = OpLabel + OpBranch %29 + %29 = OpLabel + %52 = OpIAdd %int %69 %int_1 + OpBranch %26 + %28 = OpLabel + OpBranch %53 + %53 = OpLabel + %71 = OpPhi %int %int_0 %28 %66 %56 + OpLoopMerge %55 %56 None + OpBranch %57 + %57 = OpLabel + %59 = OpSLessThan %bool %71 %int_10 + OpBranchConditional %59 %54 %55 + %54 = OpLabel + OpSelectionMerge %62 None + OpBranchConditional %25 %61 %62 + %61 = OpLabel + %64 = OpIAdd %int %71 %int_1 + OpBranch %62 + %62 = OpLabel + %72 = OpPhi %int %71 %54 %64 %61 + OpBranch %56 + %56 = OpLabel + %66 = OpIAdd %int %72 %int_1 + OpBranch %53 + %55 = OpLabel + OpReturn + OpFunctionEnd +)"; + + SinglePassRunAndMatch(text, true); +} +#endif // SPIRV_EFFCEE + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +in vec4 c; +void main() { + bool cond = false; + if (c[0] == 0) { + cond = c[1] == 0; + } else { + cond = c[2] == 0; + } + for (int i = 0; i < 10; i++) { + if (cond) { + i++; + } + } +} +*/ +TEST_F(UnswitchTest, UnswitchNotUniform) { + // Check that the unswitch is not triggered (condition loop invariant but not + // uniform) + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 330 + OpName %main "main" + OpName %c "c" + OpDecorate %c Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %c = OpVariable %_ptr_Input_v4float Input + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %float_0 = OpConstant %float 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %17 = OpAccessChain %_ptr_Input_float %c %uint_0 + %18 = OpLoad %float %17 + %20 = OpFOrdEqual %bool %18 %float_0 + OpSelectionMerge %22 None + OpBranchConditional %20 %21 %27 + %21 = OpLabel + %24 = OpAccessChain %_ptr_Input_float %c %uint_1 + %25 = OpLoad %float %24 + %26 = OpFOrdEqual %bool %25 %float_0 + OpBranch %22 + %27 = OpLabel + %29 = OpAccessChain %_ptr_Input_float %c %uint_2 + %30 = OpLoad %float %29 + %31 = OpFOrdEqual %bool %30 %float_0 + OpBranch %22 + %22 = OpLabel + %52 = OpPhi %bool %26 %21 %31 %27 + OpBranch %36 + %36 = OpLabel + %53 = OpPhi %int %int_0 %22 %51 %39 + OpLoopMerge %38 %39 None + OpBranch %40 + %40 = OpLabel + %43 = OpSLessThan %bool %53 %int_10 + OpBranchConditional %43 %37 %38 + %37 = OpLabel + OpSelectionMerge %46 None + OpBranchConditional %52 %45 %46 + %45 = OpLabel + %49 = OpIAdd %int %53 %int_1 + OpBranch %46 + %46 = OpLabel + %54 = OpPhi %int %53 %37 %49 %45 + OpBranch %39 + %39 = OpLabel + %51 = OpIAdd %int %54 %int_1 + OpBranch %36 + %38 = OpLabel + OpReturn + OpFunctionEnd + )"; + + auto result = + SinglePassRunAndDisassemble(text, true, false); + + EXPECT_EQ(opt::Pass::Status::SuccessWithoutChange, std::get<1>(result)); +} + +} // namespace diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 3f8dd884f..9c9a2710c 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -187,6 +187,10 @@ Options (in lexicographical order): --local-redundancy-elimination Looks for instructions in the same basic block that compute the same value, and deletes the redundant ones. + --loop-unswitch + Hoists loop-invariant conditionals out of loops by duplicating + the loop on each branch of the conditional and adjusting each + copy of the loop. -O Optimize for performance. Apply a sequence of transformations in an attempt to improve the performance of the generated @@ -463,6 +467,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer, optimizer->RegisterPass(CreateDeadVariableEliminationPass()); } else if (0 == strcmp(cur_arg, "--fold-spec-const-op-composite")) { optimizer->RegisterPass(CreateFoldSpecConstantOpAndCompositePass()); + } else if (0 == strcmp(cur_arg, "--loop-unswitch")) { + optimizer->RegisterPass(CreateLoopUnswitchPass()); } else if (0 == strcmp(cur_arg, "--scalar-replacement")) { optimizer->RegisterPass(CreateScalarReplacementPass()); } else if (0 == strcmp(cur_arg, "--strength-reduction")) {