diff --git a/Android.mk b/Android.mk index e2afc821f..04a84a17f 100644 --- a/Android.mk +++ b/Android.mk @@ -100,6 +100,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/local_single_store_elim_pass.cpp \ source/opt/local_ssa_elim_pass.cpp \ source/opt/loop_descriptor.cpp \ + source/opt/loop_peeling.cpp \ source/opt/loop_unroller.cpp \ source/opt/loop_unswitch_pass.cpp \ source/opt/loop_utils.cpp \ diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index 4121f7c31..bc442e6f3 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -58,6 +58,7 @@ add_library(SPIRV-Tools-opt local_ssa_elim_pass.h log.h loop_descriptor.h + loop_peeling.h loop_unroller.h loop_utils.h loop_unswitch_pass.h @@ -132,6 +133,7 @@ add_library(SPIRV-Tools-opt local_single_store_elim_pass.cpp local_ssa_elim_pass.cpp loop_descriptor.cpp + loop_peeling.cpp loop_utils.cpp loop_unroller.cpp loop_unswitch_pass.cpp diff --git a/source/opt/function.h b/source/opt/function.h index f9fc65518..be7b70bf9 100644 --- a/source/opt/function.h +++ b/source/opt/function.h @@ -15,6 +15,7 @@ #ifndef LIBSPIRV_OPT_CONSTRUCTS_H_ #define LIBSPIRV_OPT_CONSTRUCTS_H_ +#include #include #include #include @@ -92,6 +93,13 @@ class Function { return const_iterator(&blocks_, blocks_.cend()); } + // Returns an iterator to the basic block |id|. + iterator FindBlock(uint32_t bb_id) { + return std::find_if(begin(), end(), [bb_id](const ir::BasicBlock& it_bb) { + return bb_id == it_bb.id(); + }); + } + // Runs the given function |f| on each instruction in this function, and // optionally on debug line instructions that might precede them. void ForEachInst(const std::function& f, diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h index aa722cbd6..6c4f5f5c5 100644 --- a/source/opt/ir_builder.h +++ b/source/opt/ir_builder.h @@ -159,6 +159,61 @@ class InstructionBuilder { return AddInstruction(std::move(phi_inst)); } + // Creates an addition instruction. + // The id |type| must be the id of the instruction's type, must be the same as + // |op1| and |op2| types. + // The id |op1| is the left hand side of the operation. + // The id |op2| is the right hand side of the operation. + ir::Instruction* AddIAdd(uint32_t type, uint32_t op1, uint32_t op2) { + std::unique_ptr inst(new ir::Instruction( + GetContext(), SpvOpIAdd, type, GetContext()->TakeNextId(), + {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}})); + return AddInstruction(std::move(inst)); + } + + // Creates a less than instruction for unsigned integer. + // The id |op1| is the left hand side of the operation. + // The id |op2| is the right hand side of the operation. + // It is assumed that |op1| and |op2| have the same underlying type. + ir::Instruction* AddULessThan(uint32_t op1, uint32_t op2) { + analysis::Bool bool_type; + uint32_t type = GetContext()->get_type_mgr()->GetId(&bool_type); + std::unique_ptr inst(new ir::Instruction( + GetContext(), SpvOpULessThan, type, GetContext()->TakeNextId(), + {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}})); + return AddInstruction(std::move(inst)); + } + + // Creates a less than instruction for signed integer. + // The id |op1| is the left hand side of the operation. + // The id |op2| is the right hand side of the operation. + // It is assumed that |op1| and |op2| have the same underlying type. + ir::Instruction* AddSLessThan(uint32_t op1, uint32_t op2) { + analysis::Bool bool_type; + uint32_t type = GetContext()->get_type_mgr()->GetId(&bool_type); + std::unique_ptr inst(new ir::Instruction( + GetContext(), SpvOpSLessThan, type, GetContext()->TakeNextId(), + {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}})); + return AddInstruction(std::move(inst)); + } + + // Creates an OpILessThan or OpULessThen instruction depending on the sign of + // |op1|. The id |op1| is the left hand side of the operation. The id |op2| is + // the right hand side of the operation. It is assumed that |op1| and |op2| + // have the same underlying type. + ir::Instruction* AddLessThan(uint32_t op1, uint32_t op2) { + ir::Instruction* op1_insn = context_->get_def_use_mgr()->GetDef(op1); + analysis::Type* type = + GetContext()->get_type_mgr()->GetType(op1_insn->type_id()); + analysis::Integer* int_type = type->AsInteger(); + assert(int_type && "Operand is not of int type"); + + if (int_type->IsSigned()) + return AddSLessThan(op1, op2); + else + return AddULessThan(op1, op2); + } + // Creates a select instruction. // |type| must match the types of |true_value| and |false_value|. It is up to // the caller to ensure that |cond| is a correct type (bool or vector of diff --git a/source/opt/loop_descriptor.cpp b/source/opt/loop_descriptor.cpp index 27f53e23d..0a4ed9250 100644 --- a/source/opt/loop_descriptor.cpp +++ b/source/opt/loop_descriptor.cpp @@ -289,13 +289,16 @@ void Loop::SetMergeBlock(BasicBlock* merge) { } void Loop::SetPreHeaderBlock(BasicBlock* preheader) { - assert(!IsInsideLoop(preheader) && "The preheader block is in the loop"); - assert(preheader->tail()->opcode() == SpvOpBranch && - "The preheader block does not unconditionally branch to the header " - "block"); - assert(preheader->tail()->GetSingleWordOperand(0) == GetHeaderBlock()->id() && - "The preheader block does not unconditionally branch to the header " - "block"); + if (preheader) { + assert(!IsInsideLoop(preheader) && "The preheader block is in the loop"); + assert(preheader->tail()->opcode() == SpvOpBranch && + "The preheader block does not unconditionally branch to the header " + "block"); + assert(preheader->tail()->GetSingleWordOperand(0) == + GetHeaderBlock()->id() && + "The preheader block does not unconditionally branch to the header " + "block"); + } loop_preheader_ = preheader; } diff --git a/source/opt/loop_peeling.cpp b/source/opt/loop_peeling.cpp new file mode 100644 index 000000000..b2d1b088c --- /dev/null +++ b/source/opt/loop_peeling.cpp @@ -0,0 +1,563 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "ir_builder.h" +#include "ir_context.h" +#include "loop_descriptor.h" +#include "loop_peeling.h" +#include "loop_utils.h" + +namespace spvtools { +namespace opt { + +void LoopPeeling::DuplicateAndConnectLoop( + LoopUtils::LoopCloningResult* clone_results) { + ir::CFG& cfg = *context_->cfg(); + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + assert(CanPeelLoop() && "Cannot peel loop!"); + + std::vector ordered_loop_blocks; + ir::BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock(); + + loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks); + + cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks); + + // Add the basic block to the function. + ir::Function::iterator it = + loop_utils_.GetFunction()->FindBlock(pre_header->id()); + assert(it != loop_utils_.GetFunction()->end() && + "Pre-header not found in the function."); + loop_utils_.GetFunction()->AddBasicBlocks( + clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it); + + // Make the |loop_|'s preheader the |cloned_loop_| one. + ir::BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock(); + pre_header->ForEachSuccessorLabel( + [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); }); + + // Update cfg. + cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id()); + cloned_loop_->SetPreHeaderBlock(pre_header); + loop_->SetPreHeaderBlock(nullptr); + + // When cloning the loop, we didn't cloned the merge block, so currently + // |cloned_loop_| shares the same block as |loop_|. + // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a + // branch to |loop_|'s header (so header will also be the merge of + // |cloned_loop_|). + uint32_t cloned_loop_exit = 0; + for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) { + if (loop_->IsInsideLoop(pred_id)) continue; + ir::BasicBlock* bb = cfg.block(pred_id); + assert(cloned_loop_exit == 0 && "The loop has multiple exits."); + cloned_loop_exit = bb->id(); + bb->ForEachSuccessorLabel([this](uint32_t* succ) { + if (*succ == loop_->GetMergeBlock()->id()) + *succ = loop_->GetHeaderBlock()->id(); + }); + } + + // Update cfg. + cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id()); + cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id()); + + // Patch the phi of the original loop header: + // - Set the loop entry branch to come from the cloned loop exit block; + // - Set the initial value of the phi using the corresponding cloned loop + // exit values. + // + // We patch the iterating value initializers of the original loop using the + // corresponding cloned loop exit values. Connects the cloned loop iterating + // values to the original loop. This make sure that the initial value of the + // second loop starts with the last value of the first loop. + // + // For example, loops like: + // + // int z = 0; + // for (int i = 0; i++ < M; i += cst1) { + // if (cond) + // z += cst2; + // } + // + // Will become: + // + // int z = 0; + // int i = 0; + // for (; i++ < M; i += cst1) { + // if (cond) + // z += cst2; + // } + // for (; i++ < M; i += cst1) { + // if (cond) + // z += cst2; + // } + loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr, + clone_results, + this](ir::Instruction* phi) { + for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { + if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) { + phi->SetInOperand(i, + {clone_results->value_map_.at( + exit_value_.at(phi->result_id())->result_id())}); + phi->SetInOperand(i + 1, {cloned_loop_exit}); + def_use_mgr->AnalyzeInstUse(phi); + return; + } + } + }); + + // Force the creation of a new preheader for the original loop and set it as + // the merge block for the cloned loop. + cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock()); +} + +void LoopPeeling::InsertCanonicalInductionVariable() { + ir::BasicBlock::iterator insert_point = + GetClonedLoop()->GetLatchBlock()->tail(); + if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) { + --insert_point; + } + InstructionBuilder builder(context_, &*insert_point, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + ir::Instruction* uint_1_cst = + builder.Add32BitConstantInteger(1, int_type_->IsSigned()); + // Create the increment. + // Note that we do "1 + 1" here, one of the operand should the phi + // value but we don't have it yet. The operand will be set latter. + ir::Instruction* iv_inc = builder.AddIAdd( + uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id()); + + builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin()); + + canonical_induction_variable_ = builder.AddPhi( + uint_1_cst->type_id(), + {builder.Add32BitConstantInteger(0, int_type_->IsSigned()) + ->result_id(), + GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(), + GetClonedLoop()->GetLatchBlock()->id()}); + // Connect everything. + iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()}); + + // Update def/use manager. + context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc); + + // If do-while form, use the incremented value. + if (do_while_form_) { + canonical_induction_variable_ = iv_inc; + } +} + +void LoopPeeling::GetIteratorUpdateOperations( + const ir::Loop* loop, ir::Instruction* iterator, + std::unordered_set* operations) { + opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + operations->insert(iterator); + iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) { + ir::Instruction* insn = def_use_mgr->GetDef(*id); + if (insn->opcode() == SpvOpLabel) { + return; + } + if (operations->count(insn)) { + return; + } + if (!loop->IsInsideLoop(insn)) { + return; + } + GetIteratorUpdateOperations(loop, insn, operations); + }); +} + +// Gather the set of blocks for all the path from |entry| to |root|. +static void GetBlocksInPath(uint32_t block, uint32_t entry, + std::unordered_set* blocks_in_path, + const ir::CFG& cfg) { + for (uint32_t pid : cfg.preds(block)) { + if (blocks_in_path->insert(pid).second) { + if (pid != entry) { + GetBlocksInPath(pid, entry, blocks_in_path, cfg); + } + } + } +} + +bool LoopPeeling::IsConditionCheckSideEffectFree() const { + ir::CFG& cfg = *context_->cfg(); + + // The "do-while" form does not cause issues, the algorithm takes into account + // the first iteration. + if (!do_while_form_) { + uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0]; + + std::unordered_set blocks_in_path; + + blocks_in_path.insert(condition_block_id); + GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(), + &blocks_in_path, cfg); + + for (uint32_t bb_id : blocks_in_path) { + ir::BasicBlock* bb = cfg.block(bb_id); + if (!bb->WhileEachInst([this](ir::Instruction* insn) { + if (insn->IsBranch()) return true; + switch (insn->opcode()) { + case SpvOpLabel: + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + return true; + default: + break; + } + return context_->IsCombinatorInstruction(insn); + })) { + return false; + } + } + } + + return true; +} + +void LoopPeeling::GetIteratingExitValues() { + ir::CFG& cfg = *context_->cfg(); + + loop_->GetHeaderBlock()->ForEachPhiInst([this](ir::Instruction* phi) { + exit_value_[phi->result_id()] = nullptr; + }); + + if (!loop_->GetMergeBlock()) { + return; + } + if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { + return; + } + opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0]; + + auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id()); + do_while_form_ = std::find(header_pred.begin(), header_pred.end(), + condition_block_id) != header_pred.end(); + if (do_while_form_) { + loop_->GetHeaderBlock()->ForEachPhiInst( + [condition_block_id, def_use_mgr, this](ir::Instruction* phi) { + std::unordered_set operations; + + for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { + if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) { + exit_value_[phi->result_id()] = + def_use_mgr->GetDef(phi->GetSingleWordInOperand(i)); + } + } + }); + } else { + DominatorTree* dom_tree = + &context_->GetDominatorAnalysis(loop_utils_.GetFunction(), cfg) + ->GetDomTree(); + ir::BasicBlock* condition_block = cfg.block(condition_block_id); + + loop_->GetHeaderBlock()->ForEachPhiInst( + [dom_tree, condition_block, this](ir::Instruction* phi) { + std::unordered_set operations; + + // Not the back-edge value, check if the phi instruction is the only + // possible candidate. + GetIteratorUpdateOperations(loop_, phi, &operations); + + for (ir::Instruction* insn : operations) { + if (insn == phi) { + continue; + } + if (dom_tree->Dominates(context_->get_instr_block(insn), + condition_block)) { + return; + } + } + exit_value_[phi->result_id()] = phi; + }); + } +} + +void LoopPeeling::FixExitCondition( + const std::function& condition_builder) { + ir::CFG& cfg = *context_->cfg(); + + uint32_t condition_block_id = 0; + for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) { + if (GetClonedLoop()->IsInsideLoop(id)) { + condition_block_id = id; + break; + } + } + assert(condition_block_id != 0 && "2nd loop in improperly connected"); + + ir::BasicBlock* condition_block = cfg.block(condition_block_id); + ir::Instruction* exit_condition = condition_block->terminator(); + assert(exit_condition->opcode() == SpvOpBranchConditional); + ir::BasicBlock::iterator insert_point = condition_block->tail(); + if (condition_block->GetMergeInst()) { + --insert_point; + } + + exit_condition->SetInOperand(0, {condition_builder(&*insert_point)}); + + uint32_t to_continue_block_idx = + GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1)) + ? 1 + : 2; + exit_condition->SetInOperand( + 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)}); + exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()}); + + // Update def/use manager. + context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition); +} + +ir::BasicBlock* LoopPeeling::CreateBlockBefore(ir::BasicBlock* bb) { + analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + ir::CFG& cfg = *context_->cfg(); + assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor"); + + std::unique_ptr new_bb = MakeUnique( + std::unique_ptr(new ir::Instruction( + context_, SpvOpLabel, 0, context_->TakeNextId(), {}))); + new_bb->SetParent(loop_utils_.GetFunction()); + // Update the loop descriptor. + ir::Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb]; + if (in_loop) { + in_loop->AddBasicBlock(new_bb.get()); + loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop); + } + + context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get()); + def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst()); + + ir::BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]); + bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) { + if (*id == bb->id()) { + *id = new_bb->id(); + } + }); + cfg.RemoveEdge(bb_pred->id(), bb->id()); + cfg.AddEdge(bb_pred->id(), new_bb->id()); + def_use_mgr->AnalyzeInstUse(&*bb_pred->tail()); + + // Update the incoming branch. + bb->ForEachPhiInst([&new_bb, def_use_mgr](ir::Instruction* phi) { + phi->SetInOperand(1, {new_bb->id()}); + def_use_mgr->AnalyzeInstUse(phi); + }); + InstructionBuilder(context_, new_bb.get(), + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping) + .AddBranch(bb->id()); + cfg.AddEdge(new_bb->id(), bb->id()); + + // Add the basic block to the function. + ir::Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id()); + assert(it != loop_utils_.GetFunction()->end() && + "Basic block not found in the function."); + ir::BasicBlock* ret = new_bb.get(); + loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it); + return ret; +} + +ir::BasicBlock* LoopPeeling::ProtectLoop(ir::Loop* loop, + ir::Instruction* condition, + ir::BasicBlock* if_merge) { + ir::BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock(); + // Will no longer be a pre-header because of the if. + loop->SetPreHeaderBlock(nullptr); + // Kill the branch to the header. + context_->KillInst(&*if_block->tail()); + + InstructionBuilder builder(context_, if_block, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + builder.AddConditionalBranch(condition->result_id(), + loop->GetHeaderBlock()->id(), if_merge->id(), + if_merge->id()); + + return if_block; +} + +void LoopPeeling::PeelBefore(uint32_t peel_factor) { + assert(CanPeelLoop() && "Cannot peel loop"); + LoopUtils::LoopCloningResult clone_results; + + // Clone the loop and insert the cloned one before the loop. + DuplicateAndConnectLoop(&clone_results); + + // Add a canonical induction variable "canonical_induction_variable_". + InsertCanonicalInductionVariable(); + + InstructionBuilder builder(context_, + &*cloned_loop_->GetPreHeaderBlock()->tail(), + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + ir::Instruction* factor = + builder.Add32BitConstantInteger(peel_factor, int_type_->IsSigned()); + + ir::Instruction* has_remaining_iteration = builder.AddLessThan( + factor->result_id(), loop_iteration_count_->result_id()); + ir::Instruction* max_iteration = builder.AddSelect( + factor->type_id(), has_remaining_iteration->result_id(), + factor->result_id(), loop_iteration_count_->result_id()); + + // Change the exit condition of the cloned loop to be (exit when become + // false): + // "canonical_induction_variable_" < min("factor", "loop_iteration_count_") + FixExitCondition([max_iteration, this](ir::Instruction* insert_before_point) { + return InstructionBuilder(context_, insert_before_point, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping) + .AddLessThan(canonical_induction_variable_->result_id(), + max_iteration->result_id()) + ->result_id(); + }); + + // "Protect" the second loop: the second loop can only be executed if + // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_). + ir::BasicBlock* if_merge_block = loop_->GetMergeBlock(); + loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock())); + // Prevent the second loop from being executed if we already executed all the + // required iterations. + ir::BasicBlock* if_block = + ProtectLoop(loop_, has_remaining_iteration, if_merge_block); + // Patch the phi of the merge block. + if_merge_block->ForEachPhiInst( + [&clone_results, if_block, this](ir::Instruction* phi) { + // if_merge_block had previously only 1 predecessor. + uint32_t incoming_value = phi->GetSingleWordInOperand(0); + auto def_in_loop = clone_results.value_map_.find(incoming_value); + if (def_in_loop != clone_results.value_map_.end()) + incoming_value = def_in_loop->second; + phi->AddOperand( + {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}}); + phi->AddOperand( + {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}}); + context_->get_def_use_mgr()->AnalyzeInstUse(phi); + }); + + context_->InvalidateAnalysesExceptFor( + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping | + ir::IRContext::kAnalysisLoopAnalysis | ir::IRContext::kAnalysisCFG); +} + +void LoopPeeling::PeelAfter(uint32_t peel_factor) { + assert(CanPeelLoop() && "Cannot peel loop"); + LoopUtils::LoopCloningResult clone_results; + + // Clone the loop and insert the cloned one before the loop. + DuplicateAndConnectLoop(&clone_results); + + // Add a canonical induction variable "canonical_induction_variable_". + InsertCanonicalInductionVariable(); + + InstructionBuilder builder(context_, + &*cloned_loop_->GetPreHeaderBlock()->tail(), + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + ir::Instruction* factor = + builder.Add32BitConstantInteger(peel_factor, int_type_->IsSigned()); + + ir::Instruction* has_remaining_iteration = builder.AddLessThan( + factor->result_id(), loop_iteration_count_->result_id()); + + // Change the exit condition of the cloned loop to be (exit when become + // false): + // "canonical_induction_variable_" + "factor" < "loop_iteration_count_" + FixExitCondition([factor, this](ir::Instruction* insert_before_point) { + InstructionBuilder cond_builder( + context_, insert_before_point, + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping); + // Build the following check: canonical_induction_variable_ + factor < + // iteration_count + return cond_builder + .AddLessThan(cond_builder + .AddIAdd(canonical_induction_variable_->type_id(), + canonical_induction_variable_->result_id(), + factor->result_id()) + ->result_id(), + loop_iteration_count_->result_id()) + ->result_id(); + }); + + // "Protect" the first loop: the first loop can only be executed if + // factor < loop_iteration_count_. + + // The original loop's pre-header was the cloned loop merge block. + GetClonedLoop()->SetMergeBlock( + CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock())); + // Use the second loop preheader as if merge block. + + // Prevent the first loop if only the peeled loop needs it. + ir::BasicBlock* if_block = + ProtectLoop(cloned_loop_, has_remaining_iteration, + GetOriginalLoop()->GetPreHeaderBlock()); + + // Patch the phi of the header block. + // We added an if to enclose the first loop and because the phi node are + // connected to the exit value of the first loop, the definition no longer + // dominate the preheader. + // We had to the preheader (our if merge block) the required phi instruction + // and patch the header phi. + GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst( + [&clone_results, if_block, this](ir::Instruction* phi) { + opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + auto find_value_idx = [](ir::Instruction* phi_inst, ir::Loop* loop) { + uint32_t preheader_value_idx = + !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2; + return preheader_value_idx; + }; + + ir::Instruction* cloned_phi = + def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id())); + uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand( + find_value_idx(cloned_phi, GetClonedLoop())); + + ir::Instruction* new_phi = + InstructionBuilder(context_, + &*GetOriginalLoop()->GetPreHeaderBlock()->tail(), + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping) + .AddPhi(phi->type_id(), + {phi->GetSingleWordInOperand( + find_value_idx(phi, GetOriginalLoop())), + GetClonedLoop()->GetMergeBlock()->id(), + cloned_preheader_value, if_block->id()}); + + phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()), + {new_phi->result_id()}); + def_use_mgr->AnalyzeInstUse(phi); + }); + + context_->InvalidateAnalysesExceptFor( + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping | + ir::IRContext::kAnalysisLoopAnalysis | ir::IRContext::kAnalysisCFG); +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/loop_peeling.h b/source/opt/loop_peeling.h new file mode 100644 index 000000000..9b912e560 --- /dev/null +++ b/source/opt/loop_peeling.h @@ -0,0 +1,223 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SOURCE_OPT_LOOP_PEELING_H_ +#define SOURCE_OPT_LOOP_PEELING_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "opt/ir_context.h" +#include "opt/loop_descriptor.h" +#include "opt/loop_utils.h" + +namespace spvtools { +namespace opt { + +// Utility class to perform the peeling of a given loop. +// The loop peeling transformation make a certain amount of a loop iterations to +// be executed either before (peel before) or after (peel after) the transformed +// loop. +// +// For peeling cases the transformation does the following steps: +// - It clones the loop and inserts the cloned loop before the original loop; +// - It connects all iterating values of the cloned loop with the +// corresponding original loop values so that the second loop starts with +// the appropriate values. +// - It inserts a new induction variable "i" is inserted into the cloned that +// starts with the value 0 and increment by step of one. +// +// The last step is specific to each case: +// - Peel before: the transformation is to peel the "N" first iterations. +// The exit condition of the cloned loop is changed so that the loop +// exits when "i < N" becomes false. The original loop is then protected to +// only execute if there is any iteration left to do. +// - Peel after: the transformation is to peel the "N" last iterations, +// then the exit condition of the cloned loop is changed so that the loop +// exits when "i + N < max_iteration" becomes false, where "max_iteration" +// is the upper bound of the loop. The cloned loop is then protected to +// only execute if there is any iteration left to do no covered by the +// second. +// +// To be peelable: +// - The loop must be in LCSSA form; +// - The loop must not contain any breaks; +// - The loop must not have any ambiguous iterators updates (see +// "CanPeelLoop"). +// The method "CanPeelLoop" checks that those constrained are met. +// +// FIXME(Victor): Allow the utility it accept an canonical induction variable +// rather than automatically create one. +// FIXME(Victor): When possible, evaluate the initial value of the second loop +// iterating values rather than using the exit value of the first loop. +// FIXME(Victor): Make the utility work-out the upper bound without having to +// provide it. This should become easy once the scalar evolution is in. +class LoopPeeling { + public: + // LoopPeeling constructor. + // |loop| is the loop to peel. + // |loop_iteration_count| is the instruction holding the |loop| iteration + // count, must be invariant for |loop| and must be of an int 32 type (signed + // or unsigned). + LoopPeeling(ir::IRContext* context, ir::Loop* loop, + ir::Instruction* loop_iteration_count) + : context_(context), + loop_utils_(context, loop), + loop_(loop), + loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) + ? loop_iteration_count + : nullptr), + int_type_(nullptr), + canonical_induction_variable_(nullptr) { + if (loop_iteration_count_) { + int_type_ = context_->get_type_mgr() + ->GetType(loop_iteration_count_->type_id()) + ->AsInteger(); + } + GetIteratingExitValues(); + } + + // Returns true if the loop can be peeled. + // To be peelable, all operation involved in the update of the loop iterators + // must not dominates the exit condition. This restriction is a work around to + // not miss compile code like: + // + // for (int i = 0; i + 1 < N; i++) {} + // for (int i = 0; ++i < N; i++) {} + // + // The increment will happen before the test on the exit condition leading to + // very look-a-like code. + // + // This restriction will not apply if a loop rotate is applied before (i.e. + // becomes a do-while loop). + bool CanPeelLoop() const { + ir::CFG& cfg = *context_->cfg(); + + if (!loop_iteration_count_) { + return false; + } + if (!int_type_) { + return false; + } + if (int_type_->width() != 32) { + return false; + } + if (!loop_->IsLCSSA()) { + return false; + } + if (!loop_->GetMergeBlock()) { + return false; + } + if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { + return false; + } + if (!IsConditionCheckSideEffectFree()) { + return false; + } + + return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), + [](std::pair it) { + return it.second == nullptr; + }); + } + + // Moves the execution of the |factor| first iterations of the loop into a + // dedicated loop. + void PeelBefore(uint32_t factor); + + // Moves the execution of the |factor| last iterations of the loop into a + // dedicated loop. + void PeelAfter(uint32_t factor); + + // Returns the cloned loop. + ir::Loop* GetClonedLoop() { return cloned_loop_; } + // Returns the original loop. + ir::Loop* GetOriginalLoop() { return loop_; } + + private: + ir::IRContext* context_; + LoopUtils loop_utils_; + // The original loop. + ir::Loop* loop_; + // The initial |loop_| upper bound. + ir::Instruction* loop_iteration_count_; + // The int type to use for the canonical_induction_variable_. + analysis::Integer* int_type_; + // The cloned loop. + ir::Loop* cloned_loop_; + // This is set to true when the exit and back-edge branch instruction is the + // same. + bool do_while_form_; + + // The canonical induction variable of the cloned loop. The induction variable + // is initialized to 0 and incremented by step of 1. + ir::Instruction* canonical_induction_variable_; + + // Map between loop iterators and exit values. Loop iterators + std::unordered_map exit_value_; + + // Duplicate |loop_| and place the new loop before the cloned loop. Iterating + // values from the cloned loop are then connected to the original loop as + // initializer. + void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); + + // Insert the canonical induction variable into the first loop as a simplified + // counter. + void InsertCanonicalInductionVariable(); + + // Fixes the exit condition of the before loop. The function calls + // |condition_builder| to get the condition to use in the conditional branch + // of the loop exit. The loop will be exited if the condition evaluate to + // true. |condition_builder| takes an ir::Instruction* that represent the + // insertion point. + void FixExitCondition( + const std::function& condition_builder); + + // Gathers all operations involved in the update of |iterator| into + // |operations|. + void GetIteratorUpdateOperations( + const ir::Loop* loop, ir::Instruction* iterator, + std::unordered_set* operations); + + // Gathers exiting iterator values. The function builds a map between each + // iterating value in the loop (a phi instruction in the loop header) and its + // SSA value when it exit the loop. If no exit value can be accurately found, + // it is map to nullptr (see comment on CanPeelLoop). + void GetIteratingExitValues(); + + // Returns true if a for-loop has no instruction with effects before the + // condition check. + bool IsConditionCheckSideEffectFree() const; + + // Creates a new basic block and insert it between |bb| and the predecessor of + // |bb|. + ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb); + + // Inserts code to only execute |loop| only if the given |condition| is true. + // |if_merge| is a suitable basic block to be used by the if condition as + // merge block. + // The function returns the if block protecting the loop. + ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition, + ir::BasicBlock* if_merge); +}; + +} // namespace opt +} // namespace spvtools + +#endif // SOURCE_OPT_LOOP_PEELING_H_ diff --git a/source/opt/loop_unswitch_pass.cpp b/source/opt/loop_unswitch_pass.cpp index 53f629950..964e765b9 100644 --- a/source/opt/loop_unswitch_pass.cpp +++ b/source/opt/loop_unswitch_pass.cpp @@ -88,9 +88,7 @@ class LoopUnswitch { // Return the iterator to the basic block |bb|. ir::Function::iterator FindBasicBlockPosition(ir::BasicBlock* bb_to_find) { - ir::Function::iterator it = std::find_if( - function_->begin(), function_->end(), - [bb_to_find](const ir::BasicBlock& bb) { return bb_to_find == &bb; }); + ir::Function::iterator it = function_->FindBlock(bb_to_find->id()); assert(it != function_->end() && "Basic Block not found"); return it; } diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp index 853267903..3c99b6a35 100644 --- a/source/opt/loop_utils.cpp +++ b/source/opt/loop_utils.cpp @@ -582,13 +582,20 @@ void LoopUtils::PopulateLoopDesc( new_loop->SetLatchBlock( cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id())); if (old_loop->GetMergeBlock()) { - ir::BasicBlock* bb = - cloning_result.old_to_new_bb_.at(old_loop->GetMergeBlock()->id()); + auto it = + cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id()); + ir::BasicBlock* bb = it != cloning_result.old_to_new_bb_.end() + ? it->second + : old_loop->GetMergeBlock(); new_loop->SetMergeBlock(bb); } - if (old_loop->GetPreHeaderBlock()) - new_loop->SetPreHeaderBlock( - cloning_result.old_to_new_bb_.at(old_loop->GetPreHeaderBlock()->id())); + if (old_loop->GetPreHeaderBlock()) { + auto it = + cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id()); + if (it != cloning_result.old_to_new_bb_.end()) { + new_loop->SetPreHeaderBlock(it->second); + } + } } } // namespace opt diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h index 0e77bb6b8..a50be3321 100644 --- a/source/opt/loop_utils.h +++ b/source/opt/loop_utils.h @@ -87,11 +87,14 @@ class LoopUtils { // Clone |loop_| and remap its instructions. Newly created blocks // will be added to the |cloning_result.cloned_bb_| list, correctly ordered to - // be inserted into a function. If the loop is structured, the merge construct - // will also be cloned. The function preserves the def/use, cfg and instr to - // block analyses. + // be inserted into a function. + // It is assumed that |ordered_loop_blocks| is compatible with the result of + // |Loop::ComputeLoopStructuredOrder|. If the preheader and merge block are in + // the list they will also be cloned. If not, the resulting loop will share + // them with the original loop. + // The function preserves the def/use, cfg and instr to block analyses. // The cloned loop nest will be added to the loop descriptor and will have - // owner ship. + // ownership. ir::Loop* CloneLoop( LoopCloningResult* cloning_result, const std::vector& ordered_loop_blocks) const; @@ -125,6 +128,15 @@ class LoopUtils { // called, otherwise the analysis should be invalidated. void Finalize(); + // Returns the context associate to |loop_|. + ir::IRContext* GetContext() { return context_; } + // Returns the loop descriptor owning |loop_|. + ir::LoopDescriptor* GetLoopDescriptor() { return loop_desc_; } + // Returns the loop on which the object operates on. + ir::Loop* GetLoop() const { return loop_; } + // Returns the function that |loop_| belong to. + ir::Function* GetFunction() const { return &function_; } + private: ir::IRContext* context_; ir::LoopDescriptor* loop_desc_; diff --git a/test/opt/loop_optimizations/CMakeLists.txt b/test/opt/loop_optimizations/CMakeLists.txt index 3a1144d72..53054814e 100644 --- a/test/opt/loop_optimizations/CMakeLists.txt +++ b/test/opt/loop_optimizations/CMakeLists.txt @@ -84,3 +84,9 @@ add_spvtools_unittest(TARGET unswitch_test unswitch.cpp LIBS SPIRV-Tools-opt ) + +add_spvtools_unittest(TARGET peeling_test + SRCS ../function_utils.h + peeling.cpp + LIBS SPIRV-Tools-opt +) diff --git a/test/opt/loop_optimizations/peeling.cpp b/test/opt/loop_optimizations/peeling.cpp new file mode 100644 index 000000000..a88e8dd35 --- /dev/null +++ b/test/opt/loop_optimizations/peeling.cpp @@ -0,0 +1,1075 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#ifdef SPIRV_EFFCEE +#include "effcee/effcee.h" +#endif + +#include "../pass_fixture.h" +#include "opt/ir_builder.h" +#include "opt/loop_descriptor.h" +#include "opt/loop_peeling.h" + +namespace { + +using namespace spvtools; + +using PeelingTest = PassTest<::testing::Test>; + +bool Validate(const std::vector& bin) { + spv_target_env target_env = SPV_ENV_UNIVERSAL_1_2; + spv_context spvContext = spvContextCreate(target_env); + spv_diagnostic diagnostic = nullptr; + spv_const_binary_t binary = {bin.data(), bin.size()}; + spv_result_t error = spvValidate(spvContext, &binary, &diagnostic); + if (error != 0) spvDiagnosticPrint(diagnostic); + spvDiagnosticDestroy(diagnostic); + spvContextDestroy(spvContext); + return error == 0; +} + +void Match(const std::string& checks, ir::IRContext* context) { + std::vector bin; + context->module()->ToBinary(&bin, true); + EXPECT_TRUE(Validate(bin)); +#ifdef SPIRV_EFFCEE + std::string assembly; + SpirvTools tools(SPV_ENV_UNIVERSAL_1_2); + EXPECT_TRUE( + tools.Disassemble(bin, &assembly, SPV_BINARY_TO_TEXT_OPTION_NO_HEADER)) + << "Disassembling failed for shader:\n" + << assembly << std::endl; + auto match_result = effcee::Match(assembly, checks); + EXPECT_EQ(effcee::Result::Status::Ok, match_result.status()) + << match_result.message() << "\nChecking result:\n" + << assembly; +#endif // ! SPIRV_EFFCEE +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +First test: +#version 330 core +void main() { + for(int i = 0; i < 10; ++i) { + if (i < 4) + break; + } +} + +Second test (with a common sub-expression elimination): +#version 330 core +void main() { + for(int i = 0; i + 1 < 10; ++i) { + } +} + +Third test: +#version 330 core +void main() { + int a[10]; + for (int i = 0; a[i] != 0; i++) {} +} + +Forth test: +#version 330 core +void main() { + for (long i = 0; i < 10; i++) {} +} + +Fifth test: +#version 330 core +void main() { + for (float i = 0; i < 10; i++) {} +} + +Sixth test: +#version 450 +layout(location = 0)out float o; +void main() { + o = 0.0; + for( int i = 0; true; i++ ) { + o += 1.0; + if (i > 10) break; + } +} +*/ +TEST_F(PeelingTest, CannotPeel) { + // Build the given SPIR-V program in |text|, take the first loop in the first + // function and test that it is not peelable. |loop_count_id| is the id + // representing the loop count, if equals to 0, then the function build a 10 + // constant as loop count. + auto test_cannot_peel = [](const std::string& text, uint32_t loop_count_id) { + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + ir::Instruction* loop_count = nullptr; + if (loop_count_id) { + loop_count = context->get_def_use_mgr()->GetDef(loop_count_id); + } else { + opt::InstructionBuilder builder(context.get(), &*f.begin()); + // Exit condition. + loop_count = builder.Add32BitSignedIntegerConstant(10); + } + + opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count); + EXPECT_FALSE(peel.CanPeelLoop()); + }; + { + SCOPED_TRACE("loop with break"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_4 = OpConstant %int 4 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %10 + %10 = OpLabel + %28 = OpPhi %int %int_0 %5 %27 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %18 = OpSLessThan %bool %28 %int_10 + OpBranchConditional %18 %11 %12 + %11 = OpLabel + %21 = OpSLessThan %bool %28 %int_4 + OpSelectionMerge %23 None + OpBranchConditional %21 %22 %23 + %22 = OpLabel + OpBranch %12 + %23 = OpLabel + OpBranch %13 + %13 = OpLabel + %27 = OpIAdd %int %28 %int_1 + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + test_cannot_peel(text, 0); + } + + { + SCOPED_TRACE("Ambiguous iterator update"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %10 + %10 = OpLabel + %23 = OpPhi %int %int_0 %5 %17 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %17 = OpIAdd %int %23 %int_1 + %20 = OpSLessThan %bool %17 %int_10 + OpBranchConditional %20 %11 %12 + %11 = OpLabel + OpBranch %13 + %13 = OpLabel + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + + test_cannot_peel(text, 0); + } + + { + SCOPED_TRACE("No loop static bounds"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + OpName %i "i" + OpName %a "a" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %uint_10 = OpConstant %uint 10 +%_arr_int_uint_10 = OpTypeArray %int %uint_10 +%_ptr_Function__arr_int_uint_10 = OpTypePointer Function %_arr_int_uint_10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %a = OpVariable %_ptr_Function__arr_int_uint_10 Function + OpStore %i %int_0 + OpBranch %10 + %10 = OpLabel + %28 = OpPhi %int %int_0 %5 %27 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %21 = OpAccessChain %_ptr_Function_int %a %28 + %22 = OpLoad %int %21 + %24 = OpINotEqual %bool %22 %int_0 + OpBranchConditional %24 %11 %12 + %11 = OpLabel + OpBranch %13 + %13 = OpLabel + %27 = OpIAdd %int %28 %int_1 + OpStore %i %27 + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + + test_cannot_peel(text, 22); + } + { + SCOPED_TRACE("Int 64 type for conditions"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginLowerLeft + OpSource GLSL 330 + OpName %2 "main" + OpName %4 "i" + %6 = OpTypeVoid + %3 = OpTypeFunction %6 + %7 = OpTypeInt 64 1 + %8 = OpTypePointer Function %7 + %9 = OpConstant %7 0 + %15 = OpConstant %7 10 + %16 = OpTypeBool + %17 = OpConstant %7 1 + %2 = OpFunction %6 None %3 + %5 = OpLabel + %4 = OpVariable %8 Function + OpStore %4 %9 + OpBranch %10 + %10 = OpLabel + %22 = OpPhi %7 %9 %5 %21 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %18 = OpSLessThan %16 %22 %15 + OpBranchConditional %18 %11 %12 + %11 = OpLabel + OpBranch %13 + %13 = OpLabel + %21 = OpIAdd %7 %22 %17 + OpStore %4 %21 + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + // %15 is a constant for a 64 int. Currently rejected. + test_cannot_peel(text, 15); + } + { + SCOPED_TRACE("Float type for conditions"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginLowerLeft + OpSource GLSL 330 + OpName %2 "main" + OpName %4 "i" + %6 = OpTypeVoid + %3 = OpTypeFunction %6 + %7 = OpTypeFloat 32 + %8 = OpTypePointer Function %7 + %9 = OpConstant %7 0 + %15 = OpConstant %7 10 + %16 = OpTypeBool + %17 = OpConstant %7 1 + %2 = OpFunction %6 None %3 + %5 = OpLabel + %4 = OpVariable %8 Function + OpStore %4 %9 + OpBranch %10 + %10 = OpLabel + %22 = OpPhi %7 %9 %5 %21 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %18 = OpFOrdLessThan %16 %22 %15 + OpBranchConditional %18 %11 %12 + %11 = OpLabel + OpBranch %13 + %13 = OpLabel + %21 = OpFAdd %7 %22 %17 + OpStore %4 %21 + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + // %15 is a constant for a float. Currently rejected. + test_cannot_peel(text, 15); + } + { + SCOPED_TRACE("Side effect before exit"); + + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %o + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 450 + OpName %main "main" + OpName %o "o" + OpName %i "i" + OpDecorate %o Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %o = OpVariable %_ptr_Output_float Output + %float_0 = OpConstant %float 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %true = OpConstantTrue %bool + %float_1 = OpConstant %float 1 + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + OpStore %o %float_0 + OpStore %i %int_0 + OpBranch %14 + %14 = OpLabel + %33 = OpPhi %int %int_0 %5 %32 %17 + OpLoopMerge %16 %17 None + OpBranch %15 + %15 = OpLabel + %22 = OpLoad %float %o + %23 = OpFAdd %float %22 %float_1 + OpStore %o %23 + %26 = OpSGreaterThan %bool %33 %int_10 + OpSelectionMerge %28 None + OpBranchConditional %26 %27 %28 + %27 = OpLabel + OpBranch %16 + %28 = OpLabel + OpBranch %17 + %17 = OpLabel + %32 = OpIAdd %int %33 %int_1 + OpStore %i %32 + OpBranch %14 + %16 = OpLabel + OpReturn + OpFunctionEnd + )"; + test_cannot_peel(text, 0); + } +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +void main() { + int i = 0; + for (; i < 10; i++) {} +} +*/ +TEST_F(PeelingTest, SimplePeeling) { + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %10 + %10 = OpLabel + %22 = OpPhi %int %int_0 %5 %21 %13 + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %18 = OpSLessThan %bool %22 %int_10 + OpBranchConditional %18 %11 %12 + %11 = OpLabel + OpBranch %13 + %13 = OpLabel + %21 = OpIAdd %int %22 %int_1 + OpBranch %10 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + + // Peel before. + { + SCOPED_TRACE("Peel before"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + opt::InstructionBuilder builder(context.get(), &*f.begin()); + // Exit condition. + ir::Instruction* ten_cst = builder.Add32BitSignedIntegerConstant(10); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelBefore(2); + + const std::string check = R"( +CHECK: [[CST_TEN:%\w+]] = OpConstant {{%\w+}} 10 +CHECK: [[CST_TWO:%\w+]] = OpConstant {{%\w+}} 2 +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} [[CST_TWO]] [[CST_TEN]] +CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] [[CST_TWO]] [[CST_TEN]] +CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]] +CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]] +CHECK-NEXT: OpLoopMerge +)"; + + Match(check, context.get()); + } + + // Peel after. + { + SCOPED_TRACE("Peel after"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + opt::InstructionBuilder builder(context.get(), &*f.begin()); + // Exit condition. + ir::Instruction* ten_cst = builder.Add32BitSignedIntegerConstant(10); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelAfter(2); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]] +CHECK: [[BEFORE_LOOP]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}} +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]] +CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[IF_MERGE]] = OpLabel +CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]] +CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]] +CHECK-NEXT: OpLoopMerge + +)"; + + Match(check, context.get()); + } +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +void main() { + int a[10]; + int n = a[0]; + for(int i = 0; i < n; ++i) {} +} +*/ +TEST_F(PeelingTest, PeelingUncountable) { + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + OpName %a "a" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %uint = OpTypeInt 32 0 + %uint_10 = OpConstant %uint 10 +%_arr_int_uint_10 = OpTypeArray %int %uint_10 +%_ptr_Function__arr_int_uint_10 = OpTypePointer Function %_arr_int_uint_10 + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function__arr_int_uint_10 Function + %15 = OpAccessChain %_ptr_Function_int %a %int_0 + %16 = OpLoad %int %15 + OpBranch %18 + %18 = OpLabel + %30 = OpPhi %int %int_0 %5 %29 %21 + OpLoopMerge %20 %21 None + OpBranch %22 + %22 = OpLabel + %26 = OpSLessThan %bool %30 %16 + OpBranchConditional %26 %19 %20 + %19 = OpLabel + OpBranch %21 + %21 = OpLabel + %29 = OpIAdd %int %30 %int_1 + OpBranch %18 + %20 = OpLabel + OpReturn + OpFunctionEnd + )"; + + // Peel before. + { + SCOPED_TRACE("Peel before"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(16); + EXPECT_EQ(loop_count->opcode(), SpvOpLoad); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelBefore(1); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[LOOP_COUNT:%\w+]] = OpLoad +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} {{%\w+}} [[LOOP_COUNT]] +CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] {{%\w+}} [[LOOP_COUNT]] +CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]] +CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]] +CHECK-NEXT: OpLoopMerge +)"; + + Match(check, context.get()); + } + + // Peel after. + { + SCOPED_TRACE("Peel after"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(16); + EXPECT_EQ(loop_count->opcode(), SpvOpLoad); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelAfter(1); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]] +CHECK: [[BEFORE_LOOP]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}} +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]] +CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[IF_MERGE]] = OpLabel +CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]] +CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]] +CHECK-NEXT: OpLoopMerge + +)"; + + Match(check, context.get()); + } +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +void main() { + int i = 0; + do { + i++; + } while (i < 10); +} +*/ +TEST_F(PeelingTest, DoWhilePeeling) { + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 330 + OpName %main "main" + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %10 + %10 = OpLabel + %21 = OpPhi %int %int_0 %5 %16 %13 + OpLoopMerge %12 %13 None + OpBranch %11 + %11 = OpLabel + %16 = OpIAdd %int %21 %int_1 + OpBranch %13 + %13 = OpLabel + %20 = OpSLessThan %bool %16 %int_10 + OpBranchConditional %20 %10 %12 + %12 = OpLabel + OpReturn + OpFunctionEnd + )"; + + // Peel before. + { + SCOPED_TRACE("Peel before"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + opt::InstructionBuilder builder(context.get(), &*f.begin()); + // Exit condition. + ir::Instruction* ten_cst = builder.Add32BitUnsignedIntegerConstant(10); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelBefore(2); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpULessThan {{%\w+}} +CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] +CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]] +CHECK: [[BE]] = OpLabel +CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpULessThan {{%\w+}} [[DUMMY_IT_1]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] [[BEFORE_LOOP]] [[AFTER_LOOP_PREHEADER]] + +CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[I_1]] [[AFTER_LOOP_PREHEADER]] +CHECK-NEXT: OpLoopMerge +)"; + + Match(check, context.get()); + } + + // Peel after. + { + SCOPED_TRACE("Peel after"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + opt::InstructionBuilder builder(context.get(), &*f.begin()); + // Exit condition. + ir::Instruction* ten_cst = builder.Add32BitUnsignedIntegerConstant(10); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelAfter(2); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpULessThan {{%\w+}} +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]] +CHECK: [[BEFORE_LOOP]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]] +CHECK: [[BE]] = OpLabel +CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: [[EXIT_VAL:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT_1]] +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpULessThan {{%\w+}} [[EXIT_VAL]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] [[BEFORE_LOOP]] [[BEFORE_LOOP_MERGE]] + +CHECK: [[IF_MERGE]] = OpLabel +CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I_1]] [[BEFORE_LOOP_MERGE]] +CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]] +CHECK-NEXT: OpLoopMerge +)"; + + Match(check, context.get()); + } +} + +/* +Generated from the following GLSL + --eliminate-local-multi-store + +#version 330 core +void main() { + int a[10]; + int n = a[0]; + for(int i = 0; i < n; ++i) {} +} +*/ +TEST_F(PeelingTest, PeelingLoopWithStore) { + const std::string text = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %o %n + OpExecutionMode %main OriginLowerLeft + OpSource GLSL 450 + OpName %main "main" + OpName %o "o" + OpName %end "end" + OpName %n "n" + OpName %i "i" + OpDecorate %o Location 0 + OpDecorate %n Flat + OpDecorate %n Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %o = OpVariable %_ptr_Output_float Output + %float_0 = OpConstant %float 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Input_int = OpTypePointer Input %int + %n = OpVariable %_ptr_Input_int Input + %int_0 = OpConstant %int 0 + %bool = OpTypeBool + %float_1 = OpConstant %float 1 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %end = OpVariable %_ptr_Function_int Function + %i = OpVariable %_ptr_Function_int Function + OpStore %o %float_0 + %15 = OpLoad %int %n + OpStore %end %15 + OpStore %i %int_0 + OpBranch %18 + %18 = OpLabel + %33 = OpPhi %int %int_0 %5 %32 %21 + OpLoopMerge %20 %21 None + OpBranch %22 + %22 = OpLabel + %26 = OpSLessThan %bool %33 %15 + OpBranchConditional %26 %19 %20 + %19 = OpLabel + %28 = OpLoad %float %o + %29 = OpFAdd %float %28 %float_1 + OpStore %o %29 + OpBranch %21 + %21 = OpLabel + %32 = OpIAdd %int %33 %int_1 + OpStore %i %32 + OpBranch %18 + %20 = OpLabel + OpReturn + OpFunctionEnd + )"; + + // Peel before. + { + SCOPED_TRACE("Peel before"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(15); + EXPECT_EQ(loop_count->opcode(), SpvOpLoad); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelBefore(1); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[LOOP_COUNT:%\w+]] = OpLoad +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} {{%\w+}} [[LOOP_COUNT]] +CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] {{%\w+}} [[LOOP_COUNT]] +CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]] +CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]] +CHECK-NEXT: OpLoopMerge +)"; + + Match(check, context.get()); + } + + // Peel after. + { + SCOPED_TRACE("Peel after"); + + std::unique_ptr context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << text << std::endl; + ir::Function& f = *module->begin(); + ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f); + + EXPECT_EQ(ld.NumLoops(), 1u); + + ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(15); + EXPECT_EQ(loop_count->opcode(), SpvOpLoad); + + opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count); + EXPECT_TRUE(peel.CanPeelLoop()); + peel.PeelAfter(1); + + const std::string check = R"( +CHECK: OpFunction +CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel +CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} +CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]] +CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]] +CHECK: [[BEFORE_LOOP]] = OpLabel +CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]] +CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]] +CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None +CHECK: [[COND_BLOCK:%\w+]] = OpLabel +CHECK-NEXT: OpSLessThan +CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}} +CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]] +CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]] +CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]] +CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]] +CHECK-NEXT: OpBranch [[BEFORE_LOOP]] + +CHECK: [[IF_MERGE]] = OpLabel +CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]] +CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]] + +CHECK: [[AFTER_LOOP]] = OpLabel +CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]] +CHECK-NEXT: OpLoopMerge + +)"; + + Match(check, context.get()); + } +} + +} // namespace