diff --git a/source/opt/inline_pass.cpp b/source/opt/inline_pass.cpp index 3c874a7ef..bc07ff0f2 100644 --- a/source/opt/inline_pass.cpp +++ b/source/opt/inline_pass.cpp @@ -20,6 +20,7 @@ #include #include "source/cfa.h" +#include "source/opt/reflect.h" #include "source/util/make_unique.h" // Indices of operands in SPIR-V instructions @@ -232,6 +233,220 @@ bool InlinePass::CloneSameBlockOps( }); } +void InlinePass::MoveInstsBeforeEntryBlock( + std::unordered_map* preCallSB, + BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr, + UptrVectorIterator call_block_itr) { + for (auto cii = call_block_itr->begin(); cii != call_inst_itr; + cii = call_block_itr->begin()) { + Instruction* inst = &*cii; + inst->RemoveFromList(); + std::unique_ptr cp_inst(inst); + // Remember same-block ops for possible regeneration. + if (IsSameBlockOp(&*cp_inst)) { + auto* sb_inst_ptr = cp_inst.get(); + (*preCallSB)[cp_inst->result_id()] = sb_inst_ptr; + } + new_blk_ptr->AddInstruction(std::move(cp_inst)); + } +} + +std::unique_ptr InlinePass::AddGuardBlock( + std::vector>* new_blocks, + std::unordered_map* callee2caller, + std::unique_ptr new_blk_ptr, uint32_t entry_blk_label_id) { + const auto guard_block_id = context()->TakeNextId(); + if (guard_block_id == 0) { + return nullptr; + } + AddBranch(guard_block_id, &new_blk_ptr); + new_blocks->push_back(std::move(new_blk_ptr)); + // Start the next block. + new_blk_ptr = MakeUnique(NewLabel(guard_block_id)); + // Reset the mapping of the callee's entry block to point to + // the guard block. Do this so we can fix up phis later on to + // satisfy dominance. + (*callee2caller)[entry_blk_label_id] = guard_block_id; + return new_blk_ptr; +} + +InstructionList::iterator InlinePass::AddStoresForVariableInitializers( + const std::unordered_map& callee2caller, + std::unique_ptr* new_blk_ptr, + UptrVectorIterator callee_first_block_itr) { + auto callee_var_itr = callee_first_block_itr->begin(); + while (callee_var_itr->opcode() == SpvOp::SpvOpVariable) { + if (callee_var_itr->NumInOperands() == 2) { + assert(callee2caller.count(callee_var_itr->result_id()) && + "Expected the variable to have already been mapped."); + uint32_t new_var_id = callee2caller.at(callee_var_itr->result_id()); + + // The initializer must be a constant or global value. No mapped + // should be used. + uint32_t val_id = callee_var_itr->GetSingleWordInOperand(1); + AddStore(new_var_id, val_id, new_blk_ptr); + } + ++callee_var_itr; + } + return callee_var_itr; +} + +bool InlinePass::InlineInstructionInBB( + const std::unordered_map& callee2caller, + BasicBlock* new_blk_ptr, const Instruction* inst) { + // If we have return, it must be at the end of the callee. We will handle + // it at the end. + if (inst->opcode() == SpvOpReturnValue || inst->opcode() == SpvOpReturn) + return true; + + // Copy callee instruction and remap all input Ids. + std::unique_ptr cp_inst(inst->Clone(context())); + cp_inst->ForEachInId([&callee2caller](uint32_t* iid) { + const auto mapItr = callee2caller.find(*iid); + if (mapItr != callee2caller.end()) { + *iid = mapItr->second; + } + }); + // If result id is non-zero, remap it. + const uint32_t rid = cp_inst->result_id(); + if (rid != 0) { + const auto mapItr = callee2caller.find(rid); + if (mapItr == callee2caller.end()) return false; + uint32_t nid = mapItr->second; + cp_inst->SetResultId(nid); + get_decoration_mgr()->CloneDecorations(rid, nid); + } + new_blk_ptr->AddInstruction(std::move(cp_inst)); + return true; +} + +std::unique_ptr InlinePass::InlineReturn( + const std::unordered_map& callee2caller, + std::vector>* new_blocks, + std::unique_ptr new_blk_ptr, Function* calleeFn, + const Instruction* inst, uint32_t returnVarId) { + // Store return value to return variable. + if (inst->opcode() == SpvOpReturnValue) { + assert(returnVarId != 0); + uint32_t valId = inst->GetInOperand(kSpvReturnValueId).words[0]; + const auto mapItr = callee2caller.find(valId); + if (mapItr != callee2caller.end()) { + valId = mapItr->second; + } + AddStore(returnVarId, valId, &new_blk_ptr); + } + + uint32_t returnLabelId = 0; + for (auto callee_block_itr = calleeFn->begin(); + callee_block_itr != calleeFn->end(); ++callee_block_itr) { + if (callee_block_itr->tail()->opcode() == SpvOpUnreachable || + callee_block_itr->tail()->opcode() == SpvOpKill) { + returnLabelId = context()->TakeNextId(); + break; + } + } + if (returnLabelId == 0) return new_blk_ptr; + + if (inst->opcode() == SpvOpReturn || inst->opcode() == SpvOpReturnValue) + AddBranch(returnLabelId, &new_blk_ptr); + new_blocks->push_back(std::move(new_blk_ptr)); + return MakeUnique(NewLabel(returnLabelId)); +} + +bool InlinePass::InlineEntryBlock( + const std::unordered_map& callee2caller, + std::unique_ptr* new_blk_ptr, + UptrVectorIterator callee_first_block) { + auto callee_inst_itr = AddStoresForVariableInitializers( + callee2caller, new_blk_ptr, callee_first_block); + + while (callee_inst_itr != callee_first_block->end()) { + if (!InlineInstructionInBB(callee2caller, new_blk_ptr->get(), + &*callee_inst_itr)) { + return false; + } + ++callee_inst_itr; + } + return true; +} + +std::unique_ptr InlinePass::InlineBasicBlocks( + std::vector>* new_blocks, + const std::unordered_map& callee2caller, + std::unique_ptr new_blk_ptr, Function* calleeFn) { + auto callee_block_itr = calleeFn->begin(); + ++callee_block_itr; + + while (callee_block_itr != calleeFn->end()) { + new_blocks->push_back(std::move(new_blk_ptr)); + const auto mapItr = + callee2caller.find(callee_block_itr->GetLabelInst()->result_id()); + if (mapItr == callee2caller.end()) return nullptr; + new_blk_ptr = MakeUnique(NewLabel(mapItr->second)); + + auto tail_inst_itr = callee_block_itr->end(); + for (auto inst_itr = callee_block_itr->begin(); inst_itr != tail_inst_itr; + ++inst_itr) { + if (!InlineInstructionInBB(callee2caller, new_blk_ptr.get(), + &*inst_itr)) { + return nullptr; + } + } + + ++callee_block_itr; + } + return new_blk_ptr; +} + +bool InlinePass::MoveCallerInstsAfterFunctionCall( + std::unordered_map* preCallSB, + std::unordered_map* postCallSB, + std::unique_ptr* new_blk_ptr, + BasicBlock::iterator call_inst_itr, bool multiBlocks) { + // Copy remaining instructions from caller block. + for (Instruction* inst = call_inst_itr->NextNode(); inst; + inst = call_inst_itr->NextNode()) { + inst->RemoveFromList(); + std::unique_ptr cp_inst(inst); + // If multiple blocks generated, regenerate any same-block + // instruction that has not been seen in this last block. + if (multiBlocks) { + if (!CloneSameBlockOps(&cp_inst, postCallSB, preCallSB, new_blk_ptr)) { + return false; + } + + // Remember same-block ops in this block. + if (IsSameBlockOp(&*cp_inst)) { + const uint32_t rid = cp_inst->result_id(); + (*postCallSB)[rid] = rid; + } + } + new_blk_ptr->get()->AddInstruction(std::move(cp_inst)); + } + + return true; +} + +void InlinePass::MoveLoopMergeInstToFirstBlock( + std::vector>* new_blocks) { + // Move the OpLoopMerge from the last block back to the first, where + // it belongs. + auto& first = new_blocks->front(); + auto& last = new_blocks->back(); + assert(first != last); + + // Insert a modified copy of the loop merge into the first block. + auto loop_merge_itr = last->tail(); + --loop_merge_itr; + assert(loop_merge_itr->opcode() == SpvOpLoopMerge); + std::unique_ptr cp_inst(loop_merge_itr->Clone(context())); + first->tail().InsertBefore(std::move(cp_inst)); + + // Remove the loop merge from the last block. + loop_merge_itr->RemoveFromList(); + delete &*loop_merge_itr; +} + bool InlinePass::GenInlineCode( std::vector>* new_blocks, std::vector>* new_vars, @@ -250,13 +465,19 @@ bool InlinePass::GenInlineCode( // valid. These operations can fail. context()->InvalidateAnalyses(IRContext::kAnalysisDefUse); + // If the caller is a loop header and the callee has multiple blocks, then the + // normal inlining logic will place the OpLoopMerge in the last of several + // blocks in the loop. Instead, it should be placed at the end of the first + // block. We'll wait to move the OpLoopMerge until the end of the regular + // inlining logic, and only if necessary. + bool caller_is_loop_header = call_block_itr->GetLoopMergeInst() != nullptr; + + // Single-trip loop continue block + std::unique_ptr single_trip_loop_cont_blk; + Function* calleeFn = id2function_[call_inst_itr->GetSingleWordOperand( kSpvFunctionCallFunctionId)]; - // Check for multiple returns in the callee. - auto fi = early_return_funcs_.find(calleeFn->result_id()); - const bool earlyReturn = fi != early_return_funcs_.end(); - // Map parameters to actual arguments. MapParams(calleeFn, call_inst_itr, &callee2caller); @@ -266,6 +487,31 @@ bool InlinePass::GenInlineCode( return false; } + // First block needs to use label of original block + // but map callee label in case of phi reference. + uint32_t entry_blk_label_id = calleeFn->begin()->GetLabelInst()->result_id(); + callee2caller[entry_blk_label_id] = call_block_itr->id(); + std::unique_ptr new_blk_ptr = + MakeUnique(NewLabel(call_block_itr->id())); + + // Move instructions of original caller block up to call instruction. + MoveInstsBeforeEntryBlock(&preCallSB, new_blk_ptr.get(), call_inst_itr, + call_block_itr); + + if (caller_is_loop_header && + (*(calleeFn->begin())).GetMergeInst() != nullptr) { + // We can't place both the caller's merge instruction and + // another merge instruction in the same block. So split the + // calling block. Insert an unconditional branch to a new guard + // block. Later, once we know the ID of the last block, we + // will move the caller's OpLoopMerge from the last generated + // block into the first block. We also wait to avoid + // invalidating various iterators. + new_blk_ptr = AddGuardBlock(new_blocks, &callee2caller, + std::move(new_blk_ptr), entry_blk_label_id); + if (new_blk_ptr == nullptr) return false; + } + // Create return var if needed. const uint32_t calleeTypeId = calleeFn->type_id(); uint32_t returnVarId = 0; @@ -277,341 +523,50 @@ bool InlinePass::GenInlineCode( } } - // Create set of callee result ids. Used to detect forward references - std::unordered_set callee_result_ids; - calleeFn->ForEachInst([&callee_result_ids](const Instruction* cpi) { + calleeFn->WhileEachInst([&callee2caller, this](const Instruction* cpi) { + // Create set of callee result ids. Used to detect forward references const uint32_t rid = cpi->result_id(); - if (rid != 0) callee_result_ids.insert(rid); + if (rid != 0 && callee2caller.find(rid) == callee2caller.end()) { + const uint32_t nid = context()->TakeNextId(); + if (nid == 0) return false; + callee2caller[rid] = nid; + } + return true; }); - // If the caller is a loop header and the callee has multiple blocks, then the - // normal inlining logic will place the OpLoopMerge in the last of several - // blocks in the loop. Instead, it should be placed at the end of the first - // block. We'll wait to move the OpLoopMerge until the end of the regular - // inlining logic, and only if necessary. - bool caller_is_loop_header = false; - if (call_block_itr->GetLoopMergeInst()) { - caller_is_loop_header = true; - } - - bool callee_begins_with_structured_header = - (*(calleeFn->begin())).GetMergeInst() != nullptr; - - // Clone and map callee code. Copy caller block code to beginning of - // first block and end of last block. - bool prevInstWasReturn = false; - uint32_t singleTripLoopHeaderId = 0; - uint32_t singleTripLoopContinueId = 0; - uint32_t returnLabelId = 0; - bool multiBlocks = false; - // new_blk_ptr is a new basic block in the caller. New instructions are - // written to it. It is created when we encounter the OpLabel - // of the first callee block. It is appended to new_blocks only when - // it is complete. - std::unique_ptr new_blk_ptr; - bool successful = calleeFn->WhileEachInst( - [&new_blocks, &callee2caller, &call_block_itr, &call_inst_itr, - &new_blk_ptr, &prevInstWasReturn, &returnLabelId, &returnVarId, - caller_is_loop_header, callee_begins_with_structured_header, - &calleeTypeId, &multiBlocks, &postCallSB, &preCallSB, earlyReturn, - &singleTripLoopHeaderId, &singleTripLoopContinueId, &callee_result_ids, - this](const Instruction* cpi) { - switch (cpi->opcode()) { - case SpvOpFunction: - case SpvOpFunctionParameter: - // Already processed - break; - case SpvOpVariable: - if (cpi->NumInOperands() == 2) { - assert(callee2caller.count(cpi->result_id()) && - "Expected the variable to have already been mapped."); - uint32_t new_var_id = callee2caller.at(cpi->result_id()); - - // The initializer must be a constant or global value. No mapped - // should be used. - uint32_t val_id = cpi->GetSingleWordInOperand(1); - AddStore(new_var_id, val_id, &new_blk_ptr); - } - break; - case SpvOpUnreachable: - case SpvOpKill: { - // Generate a return label so that we split the block with the - // function call. Copy the terminator into the new block. - if (returnLabelId == 0) { - returnLabelId = context()->TakeNextId(); - if (returnLabelId == 0) { - return false; - } - } - std::unique_ptr terminator( - new Instruction(context(), cpi->opcode(), 0, 0, {})); - new_blk_ptr->AddInstruction(std::move(terminator)); - break; - } - case SpvOpLabel: { - // If previous instruction was early return, insert branch - // instruction to return block. - if (prevInstWasReturn) { - if (returnLabelId == 0) { - returnLabelId = context()->TakeNextId(); - if (returnLabelId == 0) { - return false; - } - } - AddBranch(returnLabelId, &new_blk_ptr); - prevInstWasReturn = false; - } - // Finish current block (if it exists) and get label for next block. - uint32_t labelId; - bool firstBlock = false; - if (new_blk_ptr != nullptr) { - new_blocks->push_back(std::move(new_blk_ptr)); - // If result id is already mapped, use it, otherwise get a new - // one. - const uint32_t rid = cpi->result_id(); - const auto mapItr = callee2caller.find(rid); - labelId = (mapItr != callee2caller.end()) - ? mapItr->second - : context()->TakeNextId(); - if (labelId == 0) { - return false; - } - } else { - // First block needs to use label of original block - // but map callee label in case of phi reference. - labelId = call_block_itr->id(); - callee2caller[cpi->result_id()] = labelId; - firstBlock = true; - } - // Create first/next block. - new_blk_ptr = MakeUnique(NewLabel(labelId)); - if (firstBlock) { - // Copy contents of original caller block up to call instruction. - for (auto cii = call_block_itr->begin(); cii != call_inst_itr; - cii = call_block_itr->begin()) { - Instruction* inst = &*cii; - inst->RemoveFromList(); - std::unique_ptr cp_inst(inst); - // Remember same-block ops for possible regeneration. - if (IsSameBlockOp(&*cp_inst)) { - auto* sb_inst_ptr = cp_inst.get(); - preCallSB[cp_inst->result_id()] = sb_inst_ptr; - } - new_blk_ptr->AddInstruction(std::move(cp_inst)); - } - if (caller_is_loop_header && - callee_begins_with_structured_header) { - // We can't place both the caller's merge instruction and - // another merge instruction in the same block. So split the - // calling block. Insert an unconditional branch to a new guard - // block. Later, once we know the ID of the last block, we - // will move the caller's OpLoopMerge from the last generated - // block into the first block. We also wait to avoid - // invalidating various iterators. - const auto guard_block_id = context()->TakeNextId(); - if (guard_block_id == 0) { - return false; - } - AddBranch(guard_block_id, &new_blk_ptr); - new_blocks->push_back(std::move(new_blk_ptr)); - // Start the next block. - new_blk_ptr = MakeUnique(NewLabel(guard_block_id)); - // Reset the mapping of the callee's entry block to point to - // the guard block. Do this so we can fix up phis later on to - // satisfy dominance. - callee2caller[cpi->result_id()] = guard_block_id; - } - // If callee has early return, insert a header block for - // single-trip loop that will encompass callee code. Start - // postheader block. - // - // Note: Consider the following combination: - // - the caller is a single block loop - // - the callee does not begin with a structure header - // - the callee has multiple returns. - // We still need to split the caller block and insert a guard - // block. But we only need to do it once. We haven't done it yet, - // but the single-trip loop header will serve the same purpose. - if (earlyReturn) { - singleTripLoopHeaderId = context()->TakeNextId(); - if (singleTripLoopHeaderId == 0) { - return false; - } - AddBranch(singleTripLoopHeaderId, &new_blk_ptr); - new_blocks->push_back(std::move(new_blk_ptr)); - new_blk_ptr = - MakeUnique(NewLabel(singleTripLoopHeaderId)); - returnLabelId = context()->TakeNextId(); - singleTripLoopContinueId = context()->TakeNextId(); - if (returnLabelId == 0 || singleTripLoopContinueId == 0) { - return false; - } - AddLoopMerge(returnLabelId, singleTripLoopContinueId, - &new_blk_ptr); - uint32_t postHeaderId = context()->TakeNextId(); - if (postHeaderId == 0) { - return false; - } - AddBranch(postHeaderId, &new_blk_ptr); - new_blocks->push_back(std::move(new_blk_ptr)); - new_blk_ptr = MakeUnique(NewLabel(postHeaderId)); - multiBlocks = true; - // Reset the mapping of the callee's entry block to point to - // the post-header block. Do this so we can fix up phis later - // on to satisfy dominance. - callee2caller[cpi->result_id()] = postHeaderId; - } - } else { - multiBlocks = true; - } - } break; - case SpvOpReturnValue: { - // Store return value to return variable. - assert(returnVarId != 0); - uint32_t valId = cpi->GetInOperand(kSpvReturnValueId).words[0]; - const auto mapItr = callee2caller.find(valId); - if (mapItr != callee2caller.end()) { - valId = mapItr->second; - } - AddStore(returnVarId, valId, &new_blk_ptr); - - // Remember we saw a return; if followed by a label, will need to - // insert branch. - prevInstWasReturn = true; - } break; - case SpvOpReturn: { - // Remember we saw a return; if followed by a label, will need to - // insert branch. - prevInstWasReturn = true; - } break; - case SpvOpFunctionEnd: { - // If there was an early return, we generated a return label id - // for it. Now we have to generate the return block with that Id. - if (returnLabelId != 0) { - // If previous instruction was return, insert branch instruction - // to return block. - if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr); - if (earlyReturn) { - // If we generated a loop header for the single-trip loop - // to accommodate early returns, insert the continue - // target block now, with a false branch back to the loop - // header. - new_blocks->push_back(std::move(new_blk_ptr)); - new_blk_ptr = - MakeUnique(NewLabel(singleTripLoopContinueId)); - uint32_t false_id = GetFalseId(); - if (false_id == 0) { - return false; - } - AddBranchCond(false_id, singleTripLoopHeaderId, returnLabelId, - &new_blk_ptr); - } - // Generate the return block. - new_blocks->push_back(std::move(new_blk_ptr)); - new_blk_ptr = MakeUnique(NewLabel(returnLabelId)); - multiBlocks = true; - } - // Load return value into result id of call, if it exists. - if (returnVarId != 0) { - const uint32_t resId = call_inst_itr->result_id(); - assert(resId != 0); - AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr); - } - // Copy remaining instructions from caller block. - for (Instruction* inst = call_inst_itr->NextNode(); inst; - inst = call_inst_itr->NextNode()) { - inst->RemoveFromList(); - std::unique_ptr cp_inst(inst); - // If multiple blocks generated, regenerate any same-block - // instruction that has not been seen in this last block. - if (multiBlocks) { - if (!CloneSameBlockOps(&cp_inst, &postCallSB, &preCallSB, - &new_blk_ptr)) { - return false; - } - - // Remember same-block ops in this block. - if (IsSameBlockOp(&*cp_inst)) { - const uint32_t rid = cp_inst->result_id(); - postCallSB[rid] = rid; - } - } - new_blk_ptr->AddInstruction(std::move(cp_inst)); - } - // Finalize inline code. - new_blocks->push_back(std::move(new_blk_ptr)); - } break; - default: { - // Copy callee instruction and remap all input Ids. - std::unique_ptr cp_inst(cpi->Clone(context())); - bool succeeded = cp_inst->WhileEachInId( - [&callee2caller, &callee_result_ids, this](uint32_t* iid) { - const auto mapItr = callee2caller.find(*iid); - if (mapItr != callee2caller.end()) { - *iid = mapItr->second; - } else if (callee_result_ids.find(*iid) != - callee_result_ids.end()) { - // Forward reference. Allocate a new id, map it, - // use it and check for it when remapping result ids - const uint32_t nid = context()->TakeNextId(); - if (nid == 0) { - return false; - } - callee2caller[*iid] = nid; - *iid = nid; - } - return true; - }); - if (!succeeded) { - return false; - } - // If result id is non-zero, remap it. If already mapped, use mapped - // value, else use next id. - const uint32_t rid = cp_inst->result_id(); - if (rid != 0) { - const auto mapItr = callee2caller.find(rid); - uint32_t nid; - if (mapItr != callee2caller.end()) { - nid = mapItr->second; - } else { - nid = context()->TakeNextId(); - if (nid == 0) { - return false; - } - callee2caller[rid] = nid; - } - cp_inst->SetResultId(nid); - get_decoration_mgr()->CloneDecorations(rid, nid); - } - new_blk_ptr->AddInstruction(std::move(cp_inst)); - } break; - } - return true; - }); - - if (!successful) { + // Inline the entry block of the callee function. + if (!InlineEntryBlock(callee2caller, &new_blk_ptr, calleeFn->begin())) { return false; } - if (caller_is_loop_header && (new_blocks->size() > 1)) { - // Move the OpLoopMerge from the last block back to the first, where - // it belongs. - auto& first = new_blocks->front(); - auto& last = new_blocks->back(); - assert(first != last); + // Inline blocks of the callee function other than the entry block. + new_blk_ptr = InlineBasicBlocks(new_blocks, callee2caller, + std::move(new_blk_ptr), calleeFn); + if (new_blk_ptr == nullptr) return false; - // Insert a modified copy of the loop merge into the first block. - auto loop_merge_itr = last->tail(); - --loop_merge_itr; - assert(loop_merge_itr->opcode() == SpvOpLoopMerge); - std::unique_ptr cp_inst(loop_merge_itr->Clone(context())); - first->tail().InsertBefore(std::move(cp_inst)); + new_blk_ptr = + InlineReturn(callee2caller, new_blocks, std::move(new_blk_ptr), calleeFn, + &*(calleeFn->tail()->tail()), returnVarId); - // Remove the loop merge from the last block. - loop_merge_itr->RemoveFromList(); - delete &*loop_merge_itr; + // Load return value into result id of call, if it exists. + if (returnVarId != 0) { + const uint32_t resId = call_inst_itr->result_id(); + assert(resId != 0); + AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr); } + // Move instructions of original caller block after call instruction. + if (!MoveCallerInstsAfterFunctionCall(&preCallSB, &postCallSB, &new_blk_ptr, + call_inst_itr, + calleeFn->begin() != calleeFn->end())) + return false; + + // Finalize inline code. + new_blocks->push_back(std::move(new_blk_ptr)); + + if (caller_is_loop_header && (new_blocks->size() > 1)) + MoveLoopMergeInstToFirstBlock(new_blocks); + // Update block map given replacement blocks. for (auto& blk : *new_blocks) { id2block_[blk->id()] = &*blk; @@ -624,7 +579,21 @@ bool InlinePass::IsInlinableFunctionCall(const Instruction* inst) { const uint32_t calleeFnId = inst->GetSingleWordOperand(kSpvFunctionCallFunctionId); const auto ci = inlinable_.find(calleeFnId); - return ci != inlinable_.cend(); + if (ci == inlinable_.cend()) return false; + + if (early_return_funcs_.find(calleeFnId) != early_return_funcs_.end()) { + // We rely on the merge-return pass to handle the early return case + // in advance. + std::string message = + "The function '" + id2function_[calleeFnId]->DefInst().PrettyPrint() + + "' could not be inlined because the return instruction " + "is not at the end of the function. This could be fixed by " + "running merge-return before inlining."; + consumer()(SPV_MSG_WARNING, "", {0, 0, 0}, message.c_str()); + return false; + } + + return true; } void InlinePass::UpdateSucceedingPhis( @@ -645,26 +614,6 @@ void InlinePass::UpdateSucceedingPhis( }); } -bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) { - // If control not structured, do not do loop/return analysis - // TODO: Analyze returns in non-structured control flow - if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader)) - return false; - const auto structured_analysis = context()->GetStructuredCFGAnalysis(); - // Search for returns in structured construct. - bool return_in_construct = false; - for (auto& blk : *func) { - auto terminal_ii = blk.cend(); - --terminal_ii; - if (spvOpcodeIsReturn(terminal_ii->opcode()) && - structured_analysis->ContainingConstruct(blk.id()) != 0) { - return_in_construct = true; - break; - } - } - return !return_in_construct; -} - bool InlinePass::HasNoReturnInLoop(Function* func) { // If control not structured, do not do loop/return analysis // TODO: Analyze returns in non-structured control flow @@ -686,10 +635,18 @@ bool InlinePass::HasNoReturnInLoop(Function* func) { } void InlinePass::AnalyzeReturns(Function* func) { + // Analyze functions without a return in loop. if (HasNoReturnInLoop(func)) { no_return_in_loop_.insert(func->result_id()); - if (!HasNoReturnInStructuredConstruct(func)) + } + // Analyze functions with a return before its tail basic block. + for (auto& blk : *func) { + auto terminal_ii = blk.cend(); + --terminal_ii; + if (spvOpcodeIsReturn(terminal_ii->opcode()) && &blk != func->tail()) { early_return_funcs_.insert(func->result_id()); + break; + } } } diff --git a/source/opt/inline_pass.h b/source/opt/inline_pass.h index bc5f78127..19fb26e17 100644 --- a/source/opt/inline_pass.h +++ b/source/opt/inline_pass.h @@ -124,10 +124,6 @@ class InlinePass : public Pass { // Return true if |inst| is a function call that can be inlined. bool IsInlinableFunctionCall(const Instruction* inst); - // Return true if |func| does not have a return that is - // nested in a structured if, switch or loop. - bool HasNoReturnInStructuredConstruct(Function* func); - // Return true if |func| has no return in a loop. The current analysis // requires structured control flow, so return false if control flow not // structured ie. module is not a shader. @@ -171,6 +167,64 @@ class InlinePass : public Pass { // Set of functions that are originally called directly or indirectly from a // continue construct. std::unordered_set funcs_called_from_continue_; + + private: + // Moves instructions of the caller function up to the call instruction + // to |new_blk_ptr|. + void MoveInstsBeforeEntryBlock( + std::unordered_map* preCallSB, + BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr, + UptrVectorIterator call_block_itr); + + // Returns a new guard block after adding a branch to the end of + // |new_blocks|. + std::unique_ptr AddGuardBlock( + std::vector>* new_blocks, + std::unordered_map* callee2caller, + std::unique_ptr new_blk_ptr, uint32_t entry_blk_label_id); + + // Add store instructions for initializers of variables. + InstructionList::iterator AddStoresForVariableInitializers( + const std::unordered_map& callee2caller, + std::unique_ptr* new_blk_ptr, + UptrVectorIterator callee_block_itr); + + // Inlines a single instruction of the callee function. + bool InlineInstructionInBB( + const std::unordered_map& callee2caller, + BasicBlock* new_blk_ptr, const Instruction* inst); + + // Inlines the return instruction of the callee function. + std::unique_ptr InlineReturn( + const std::unordered_map& callee2caller, + std::vector>* new_blocks, + std::unique_ptr new_blk_ptr, Function* calleeFn, + const Instruction* inst, uint32_t returnVarId); + + // Inlines the entry block of the callee function. + bool InlineEntryBlock( + const std::unordered_map& callee2caller, + std::unique_ptr* new_blk_ptr, + UptrVectorIterator callee_first_block); + + // Inlines basic blocks of the callee function other than the entry basic + // block. + std::unique_ptr InlineBasicBlocks( + std::vector>* new_blocks, + const std::unordered_map& callee2caller, + std::unique_ptr new_blk_ptr, Function* calleeFn); + + // Moves instructions of the caller function after the call instruction + // to |new_blk_ptr|. + bool MoveCallerInstsAfterFunctionCall( + std::unordered_map* preCallSB, + std::unordered_map* postCallSB, + std::unique_ptr* new_blk_ptr, + BasicBlock::iterator call_inst_itr, bool multiBlocks); + + // Move the OpLoopMerge from the last block back to the first. + void MoveLoopMergeInstToFirstBlock( + std::vector>* new_blocks); }; } // namespace opt diff --git a/test/opt/inline_opaque_test.cpp b/test/opt/inline_opaque_test.cpp index d10913aec..b8d2dfada 100644 --- a/test/opt/inline_opaque_test.cpp +++ b/test/opt/inline_opaque_test.cpp @@ -102,12 +102,12 @@ OpStore %30 %29 OpStore %32 %31 %33 = OpLoad %S_t %s0 OpStore %param %33 -%41 = OpAccessChain %_ptr_Function_18 %param %int_2 -%42 = OpLoad %18 %41 -%43 = OpAccessChain %_ptr_Function_v2float %param %int_0 -%44 = OpLoad %v2float %43 -%45 = OpImageSampleImplicitLod %v4float %42 %44 -OpStore %outColor %45 +%42 = OpAccessChain %_ptr_Function_18 %param %int_2 +%43 = OpLoad %18 %42 +%44 = OpAccessChain %_ptr_Function_v2float %param %int_0 +%45 = OpLoad %v2float %44 +%46 = OpImageSampleImplicitLod %v4float %43 %45 +OpStore %outColor %46 OpReturn OpFunctionEnd )"; @@ -191,10 +191,10 @@ OpFunctionEnd %34 = OpVariable %_ptr_Function_20 Function %35 = OpVariable %_ptr_Function_20 Function %25 = OpVariable %_ptr_Function_20 Function -%36 = OpLoad %20 %sampler16 -OpStore %34 %36 -%37 = OpLoad %20 %34 -OpStore %35 %37 +%37 = OpLoad %20 %sampler16 +OpStore %34 %37 +%38 = OpLoad %20 %34 +OpStore %35 %38 %26 = OpLoad %20 %35 OpStore %25 %26 %27 = OpLoad %20 %25 @@ -301,12 +301,12 @@ OpStore %31 %30 OpStore %33 %32 %34 = OpLoad %S_t %s0 OpStore %param %34 -%44 = OpAccessChain %_ptr_Function_19 %param %int_2 -%45 = OpLoad %19 %44 -%46 = OpAccessChain %_ptr_Function_v2float %param %int_0 -%47 = OpLoad %v2float %46 -%48 = OpImageSampleImplicitLod %v4float %45 %47 -OpStore %outColor %48 +%45 = OpAccessChain %_ptr_Function_19 %param %int_2 +%46 = OpLoad %19 %45 +%47 = OpAccessChain %_ptr_Function_v2float %param %int_0 +%48 = OpLoad %v2float %47 +%49 = OpImageSampleImplicitLod %v4float %46 %48 +OpStore %outColor %49 OpReturn OpFunctionEnd )"; diff --git a/test/opt/inline_test.cpp b/test/opt/inline_test.cpp index f44c04a05..76573a651 100644 --- a/test/opt/inline_test.cpp +++ b/test/opt/inline_test.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -115,12 +116,12 @@ TEST_F(InlineTest, Simple) { "%param = OpVariable %_ptr_Function_v4float Function", "%22 = OpLoad %v4float %BaseColor", "OpStore %param %22", - "%33 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%34 = OpLoad %float %33", - "%35 = OpAccessChain %_ptr_Function_float %param %uint_1", - "%36 = OpLoad %float %35", - "%37 = OpFAdd %float %34 %36", - "OpStore %32 %37", + "%34 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%35 = OpLoad %float %34", + "%36 = OpAccessChain %_ptr_Function_float %param %uint_1", + "%37 = OpLoad %float %36", + "%38 = OpFAdd %float %35 %37", + "OpStore %32 %38", "%23 = OpLoad %float %32", "%24 = OpCompositeConstruct %v4float %23 %23 %23 %23", "OpStore %color %24", @@ -248,7 +249,7 @@ TEST_F(InlineTest, Nested) { // clang-format off "%main = OpFunction %void None %15", "%28 = OpLabel", - "%57 = OpVariable %_ptr_Function_float Function", + "%58 = OpVariable %_ptr_Function_float Function", "%46 = OpVariable %_ptr_Function_float Function", "%47 = OpVariable %_ptr_Function_float Function", "%48 = OpVariable %_ptr_Function_float Function", @@ -256,21 +257,21 @@ TEST_F(InlineTest, Nested) { "%param_1 = OpVariable %_ptr_Function_v4float Function", "%29 = OpLoad %v4float %BaseColor", "OpStore %param_1 %29", - "%49 = OpAccessChain %_ptr_Function_float %param_1 %uint_0", - "%50 = OpLoad %float %49", - "%51 = OpAccessChain %_ptr_Function_float %param_1 %uint_1", - "%52 = OpLoad %float %51", - "%53 = OpFAdd %float %50 %52", - "OpStore %46 %53", - "%54 = OpAccessChain %_ptr_Function_float %param_1 %uint_2", - "%55 = OpLoad %float %54", - "OpStore %47 %55", - "%58 = OpLoad %float %46", - "%59 = OpLoad %float %47", - "%60 = OpFMul %float %58 %59", - "OpStore %57 %60", - "%56 = OpLoad %float %57", - "OpStore %48 %56", + "%50 = OpAccessChain %_ptr_Function_float %param_1 %uint_0", + "%51 = OpLoad %float %50", + "%52 = OpAccessChain %_ptr_Function_float %param_1 %uint_1", + "%53 = OpLoad %float %52", + "%54 = OpFAdd %float %51 %53", + "OpStore %46 %54", + "%55 = OpAccessChain %_ptr_Function_float %param_1 %uint_2", + "%56 = OpLoad %float %55", + "OpStore %47 %56", + "%60 = OpLoad %float %46", + "%61 = OpLoad %float %47", + "%62 = OpFMul %float %60 %61", + "OpStore %58 %62", + "%57 = OpLoad %float %58", + "OpStore %48 %57", "%30 = OpLoad %float %48", "%31 = OpCompositeConstruct %v4float %30 %30 %30 %30", "OpStore %color %31", @@ -390,13 +391,13 @@ TEST_F(InlineTest, InOutParameter) { "OpStore %b %24", "%25 = OpLoad %v4float %b", "OpStore %param %25", - "%39 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%40 = OpLoad %float %39", - "%41 = OpAccessChain %_ptr_Function_float %param %uint_1", - "%42 = OpLoad %float %41", - "%43 = OpFAdd %float %40 %42", - "%44 = OpAccessChain %_ptr_Function_float %param %uint_2", - "OpStore %44 %43", + "%40 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%41 = OpLoad %float %40", + "%42 = OpAccessChain %_ptr_Function_float %param %uint_1", + "%43 = OpLoad %float %42", + "%44 = OpFAdd %float %41 %43", + "%45 = OpAccessChain %_ptr_Function_float %param %uint_2", + "OpStore %45 %44", "%27 = OpLoad %v4float %param", "OpStore %b %27", "%28 = OpAccessChain %_ptr_Function_float %b %uint_2", @@ -521,21 +522,21 @@ TEST_F(InlineTest, BranchInCallee) { "%param = OpVariable %_ptr_Function_v4float Function", "%24 = OpLoad %v4float %BaseColor", "OpStore %param %24", - "%40 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%41 = OpLoad %float %40", - "OpStore %38 %41", - "%42 = OpLoad %float %38", - "%43 = OpFOrdLessThan %bool %42 %float_0", - "OpSelectionMerge %44 None", - "OpBranchConditional %43 %45 %44", + "%41 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%42 = OpLoad %float %41", + "OpStore %38 %42", + "%43 = OpLoad %float %38", + "%44 = OpFOrdLessThan %bool %43 %float_0", + "OpSelectionMerge %48 None", + "OpBranchConditional %44 %45 %48", "%45 = OpLabel", "%46 = OpLoad %float %38", "%47 = OpFNegate %float %46", "OpStore %38 %47", - "OpBranch %44", - "%44 = OpLabel", - "%48 = OpLoad %float %38", - "OpStore %39 %48", + "OpBranch %48", + "%48 = OpLabel", + "%49 = OpLoad %float %38", + "OpStore %39 %49", "%25 = OpLoad %float %39", "%26 = OpCompositeConstruct %v4float %25 %25 %25 %25", "OpStore %color %26", @@ -675,8 +676,8 @@ TEST_F(InlineTest, PhiAfterCall) { // clang-format off "%main = OpFunction %void None %12", "%27 = OpLabel", - "%62 = OpVariable %_ptr_Function_float Function", "%63 = OpVariable %_ptr_Function_float Function", + "%64 = OpVariable %_ptr_Function_float Function", "%52 = OpVariable %_ptr_Function_float Function", "%53 = OpVariable %_ptr_Function_float Function", "%color = OpVariable %_ptr_Function_v4float Function", @@ -687,20 +688,20 @@ TEST_F(InlineTest, PhiAfterCall) { "%29 = OpAccessChain %_ptr_Function_float %color %uint_0", "%30 = OpLoad %float %29", "OpStore %param %30", - "%54 = OpLoad %float %param", - "OpStore %52 %54", - "%55 = OpLoad %float %52", - "%56 = OpFOrdLessThan %bool %55 %float_0", - "OpSelectionMerge %57 None", - "OpBranchConditional %56 %58 %57", + "%55 = OpLoad %float %param", + "OpStore %52 %55", + "%56 = OpLoad %float %52", + "%57 = OpFOrdLessThan %bool %56 %float_0", + "OpSelectionMerge %61 None", + "OpBranchConditional %57 %58 %61", "%58 = OpLabel", "%59 = OpLoad %float %52", "%60 = OpFNegate %float %59", "OpStore %52 %60", - "OpBranch %57", - "%57 = OpLabel", - "%61 = OpLoad %float %52", - "OpStore %53 %61", + "OpBranch %61", + "%61 = OpLabel", + "%62 = OpLoad %float %52", + "OpStore %53 %62", "%31 = OpLoad %float %53", "%32 = OpFOrdGreaterThan %bool %31 %float_2", "OpSelectionMerge %33 None", @@ -709,25 +710,25 @@ TEST_F(InlineTest, PhiAfterCall) { "%35 = OpAccessChain %_ptr_Function_float %color %uint_1", "%36 = OpLoad %float %35", "OpStore %param_0 %36", - "%64 = OpLoad %float %param_0", - "OpStore %62 %64", - "%65 = OpLoad %float %62", - "%66 = OpFOrdLessThan %bool %65 %float_0", - "OpSelectionMerge %67 None", - "OpBranchConditional %66 %68 %67", - "%68 = OpLabel", - "%69 = OpLoad %float %62", - "%70 = OpFNegate %float %69", - "OpStore %62 %70", - "OpBranch %67", - "%67 = OpLabel", - "%71 = OpLoad %float %62", + "%66 = OpLoad %float %param_0", + "OpStore %63 %66", + "%67 = OpLoad %float %63", + "%68 = OpFOrdLessThan %bool %67 %float_0", + "OpSelectionMerge %72 None", + "OpBranchConditional %68 %69 %72", + "%69 = OpLabel", + "%70 = OpLoad %float %63", + "%71 = OpFNegate %float %70", "OpStore %63 %71", - "%37 = OpLoad %float %63", + "OpBranch %72", + "%72 = OpLabel", + "%73 = OpLoad %float %63", + "OpStore %64 %73", + "%37 = OpLoad %float %64", "%38 = OpFOrdGreaterThan %bool %37 %float_2", "OpBranch %33", "%33 = OpLabel", - "%39 = OpPhi %bool %32 %57 %38 %67", + "%39 = OpPhi %bool %32 %61 %38 %72", "OpSelectionMerge %40 None", "OpBranchConditional %39 %41 %40", "%41 = OpLabel", @@ -902,28 +903,28 @@ TEST_F(InlineTest, OpSampledImageOutOfBlock) { "OpStore %color1 %42", "%43 = OpLoad %v4float %BaseColor", "OpStore %param %43", - "%68 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%69 = OpLoad %float %68", - "OpStore %66 %69", - "%70 = OpLoad %float %66", - "%71 = OpFOrdLessThan %bool %70 %float_0", - "OpSelectionMerge %72 None", - "OpBranchConditional %71 %73 %72", + "%69 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%70 = OpLoad %float %69", + "OpStore %66 %70", + "%71 = OpLoad %float %66", + "%72 = OpFOrdLessThan %bool %71 %float_0", + "OpSelectionMerge %76 None", + "OpBranchConditional %72 %73 %76", "%73 = OpLabel", "%74 = OpLoad %float %66", "%75 = OpFNegate %float %74", "OpStore %66 %75", - "OpBranch %72", - "%72 = OpLabel", - "%76 = OpLoad %float %66", - "OpStore %67 %76", + "OpBranch %76", + "%76 = OpLabel", + "%77 = OpLoad %float %66", + "OpStore %67 %77", "%44 = OpLoad %float %67", "%45 = OpCompositeConstruct %v4float %44 %44 %44 %44", "OpStore %color2 %45", "%46 = OpLoad %25 %t2D", "%47 = OpLoad %27 %samp", - "%77 = OpSampledImage %29 %39 %40", - "%48 = OpImageSampleImplicitLod %v4float %77 %35", + "%78 = OpSampledImage %29 %39 %40", + "%48 = OpImageSampleImplicitLod %v4float %78 %35", "OpStore %color3 %48", "%49 = OpLoad %v4float %color1", "%50 = OpLoad %v4float %color2", @@ -1108,27 +1109,27 @@ TEST_F(InlineTest, OpImageOutOfBlock) { "OpStore %color1 %43", "%46 = OpLoad %v4float %BaseColor", "OpStore %param %46", - "%70 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%71 = OpLoad %float %70", - "OpStore %68 %71", - "%72 = OpLoad %float %68", - "%73 = OpFOrdLessThan %bool %72 %float_0", - "OpSelectionMerge %74 None", - "OpBranchConditional %73 %75 %74", + "%71 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%72 = OpLoad %float %71", + "OpStore %68 %72", + "%73 = OpLoad %float %68", + "%74 = OpFOrdLessThan %bool %73 %float_0", + "OpSelectionMerge %78 None", + "OpBranchConditional %74 %75 %78", "%75 = OpLabel", "%76 = OpLoad %float %68", "%77 = OpFNegate %float %76", "OpStore %68 %77", - "OpBranch %74", - "%74 = OpLabel", - "%78 = OpLoad %float %68", - "OpStore %69 %78", + "OpBranch %78", + "%78 = OpLabel", + "%79 = OpLoad %float %68", + "OpStore %69 %79", "%47 = OpLoad %float %69", "%48 = OpCompositeConstruct %v4float %47 %47 %47 %47", "OpStore %color2 %48", - "%79 = OpSampledImage %30 %40 %41", - "%80 = OpImage %26 %79", - "%49 = OpSampledImage %30 %80 %45", + "%80 = OpSampledImage %30 %40 %41", + "%81 = OpImage %26 %80", + "%49 = OpSampledImage %30 %81 %45", "%50 = OpImageSampleImplicitLod %v4float %49 %36", "OpStore %color3 %50", "%51 = OpLoad %v4float %color1", @@ -1314,28 +1315,28 @@ TEST_F(InlineTest, OpImageAndOpSampledImageOutOfBlock) { "OpStore %color1 %43", "%47 = OpLoad %v4float %BaseColor", "OpStore %param %47", - "%70 = OpAccessChain %_ptr_Function_float %param %uint_0", - "%71 = OpLoad %float %70", - "OpStore %68 %71", - "%72 = OpLoad %float %68", - "%73 = OpFOrdLessThan %bool %72 %float_0", - "OpSelectionMerge %74 None", - "OpBranchConditional %73 %75 %74", + "%71 = OpAccessChain %_ptr_Function_float %param %uint_0", + "%72 = OpLoad %float %71", + "OpStore %68 %72", + "%73 = OpLoad %float %68", + "%74 = OpFOrdLessThan %bool %73 %float_0", + "OpSelectionMerge %78 None", + "OpBranchConditional %74 %75 %78", "%75 = OpLabel", "%76 = OpLoad %float %68", "%77 = OpFNegate %float %76", "OpStore %68 %77", - "OpBranch %74", - "%74 = OpLabel", - "%78 = OpLoad %float %68", - "OpStore %69 %78", + "OpBranch %78", + "%78 = OpLabel", + "%79 = OpLoad %float %68", + "OpStore %69 %79", "%48 = OpLoad %float %69", "%49 = OpCompositeConstruct %v4float %48 %48 %48 %48", "OpStore %color2 %49", - "%79 = OpSampledImage %30 %40 %41", - "%80 = OpImage %26 %79", - "%81 = OpSampledImage %30 %80 %45", - "%50 = OpImageSampleImplicitLod %v4float %81 %36", + "%80 = OpSampledImage %30 %40 %41", + "%81 = OpImage %26 %80", + "%82 = OpSampledImage %30 %81 %45", + "%50 = OpImageSampleImplicitLod %v4float %82 %36", "OpStore %color3 %50", "%51 = OpLoad %v4float %color1", "%52 = OpLoad %v4float %color2", @@ -1355,292 +1356,6 @@ TEST_F(InlineTest, OpImageAndOpSampledImageOutOfBlock) { /* skip_nop = */ false, /* do_validate = */ true); } -TEST_F(InlineTest, EarlyReturnFunctionInlined) { - // #version 140 - // - // in vec4 BaseColor; - // - // float foo(vec4 bar) - // { - // if (bar.x < 0.0) - // return 0.0; - // return bar.x; - // } - // - // void main() - // { - // vec4 color = vec4(foo(BaseColor)); - // gl_FragColor = color; - // } - - const std::string predefs = - R"(OpCapability Shader -%1 = OpExtInstImport "GLSL.std.450" -OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor -OpExecutionMode %main OriginUpperLeft -OpSource GLSL 140 -OpName %main "main" -OpName %foo_vf4_ "foo(vf4;" -OpName %bar "bar" -OpName %color "color" -OpName %BaseColor "BaseColor" -OpName %param "param" -OpName %gl_FragColor "gl_FragColor" -%void = OpTypeVoid -%10 = OpTypeFunction %void -%float = OpTypeFloat 32 -%v4float = OpTypeVector %float 4 -%_ptr_Function_v4float = OpTypePointer Function %v4float -%14 = OpTypeFunction %float %_ptr_Function_v4float -%uint = OpTypeInt 32 0 -%uint_0 = OpConstant %uint 0 -%_ptr_Function_float = OpTypePointer Function %float -%float_0 = OpConstant %float 0 -%bool = OpTypeBool -%_ptr_Input_v4float = OpTypePointer Input %v4float -%BaseColor = OpVariable %_ptr_Input_v4float Input -%_ptr_Output_v4float = OpTypePointer Output %v4float -%gl_FragColor = OpVariable %_ptr_Output_v4float Output -)"; - - const std::string nonEntryFuncs = - R"(%foo_vf4_ = OpFunction %float None %14 -%bar = OpFunctionParameter %_ptr_Function_v4float -%27 = OpLabel -%28 = OpAccessChain %_ptr_Function_float %bar %uint_0 -%29 = OpLoad %float %28 -%30 = OpFOrdLessThan %bool %29 %float_0 -OpSelectionMerge %31 None -OpBranchConditional %30 %32 %31 -%32 = OpLabel -OpReturnValue %float_0 -%31 = OpLabel -%33 = OpAccessChain %_ptr_Function_float %bar %uint_0 -%34 = OpLoad %float %33 -OpReturnValue %34 -OpFunctionEnd -)"; - - const std::string before = - R"(%main = OpFunction %void None %10 -%22 = OpLabel -%color = OpVariable %_ptr_Function_v4float Function -%param = OpVariable %_ptr_Function_v4float Function -%23 = OpLoad %v4float %BaseColor -OpStore %param %23 -%24 = OpFunctionCall %float %foo_vf4_ %param -%25 = OpCompositeConstruct %v4float %24 %24 %24 %24 -OpStore %color %25 -%26 = OpLoad %v4float %color -OpStore %gl_FragColor %26 -OpReturn -OpFunctionEnd -)"; - - const std::string after = - R"(%false = OpConstantFalse %bool -%main = OpFunction %void None %10 -%22 = OpLabel -%35 = OpVariable %_ptr_Function_float Function -%color = OpVariable %_ptr_Function_v4float Function -%param = OpVariable %_ptr_Function_v4float Function -%23 = OpLoad %v4float %BaseColor -OpStore %param %23 -OpBranch %36 -%36 = OpLabel -OpLoopMerge %37 %38 None -OpBranch %39 -%39 = OpLabel -%40 = OpAccessChain %_ptr_Function_float %param %uint_0 -%41 = OpLoad %float %40 -%42 = OpFOrdLessThan %bool %41 %float_0 -OpSelectionMerge %43 None -OpBranchConditional %42 %44 %43 -%44 = OpLabel -OpStore %35 %float_0 -OpBranch %37 -%43 = OpLabel -%45 = OpAccessChain %_ptr_Function_float %param %uint_0 -%46 = OpLoad %float %45 -OpStore %35 %46 -OpBranch %37 -%38 = OpLabel -OpBranchConditional %false %36 %37 -%37 = OpLabel -%24 = OpLoad %float %35 -%25 = OpCompositeConstruct %v4float %24 %24 %24 %24 -OpStore %color %25 -%26 = OpLoad %v4float %color -OpStore %gl_FragColor %26 -OpReturn -OpFunctionEnd -)"; - - SinglePassRunAndCheck(predefs + before + nonEntryFuncs, - predefs + after + nonEntryFuncs, - false, true); -} - -TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) { - // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755 - // - // Original example is derived from: - // - // #version 450 - // - // float foo() { - // if (true) { - // } - // } - // - // void main() { foo(); } - // - // But the order of basic blocks in foo is changed so that the return - // block is listed second-last. There is only one return in the callee - // but it does not appear last. - - const std::string predefs = - R"(OpCapability Shader -OpMemoryModel Logical GLSL450 -OpEntryPoint Vertex %main "main" -OpSource GLSL 450 -OpName %main "main" -OpName %foo_ "foo(" -%void = OpTypeVoid -%4 = OpTypeFunction %void -%bool = OpTypeBool -%true = OpConstantTrue %bool -)"; - - const std::string nonEntryFuncs = - R"(%foo_ = OpFunction %void None %4 -%7 = OpLabel -OpSelectionMerge %8 None -OpBranchConditional %true %9 %8 -%8 = OpLabel -OpReturn -%9 = OpLabel -OpBranch %8 -OpFunctionEnd -)"; - - const std::string before = - R"(%main = OpFunction %void None %4 -%10 = OpLabel -%11 = OpFunctionCall %void %foo_ -OpReturn -OpFunctionEnd -)"; - - const std::string after = - R"(%main = OpFunction %void None %4 -%10 = OpLabel -OpSelectionMerge %12 None -OpBranchConditional %true %13 %12 -%12 = OpLabel -OpBranch %14 -%13 = OpLabel -OpBranch %12 -%14 = OpLabel -OpReturn -OpFunctionEnd -)"; - - SinglePassRunAndCheck(predefs + nonEntryFuncs + before, - predefs + nonEntryFuncs + after, - false, true); -} - -TEST_F(InlineTest, ForwardReferencesInPhiInlined) { - // The basic structure of the test case is like this: - // - // int foo() { - // int result = 1; - // if (true) { - // result = 1; - // } - // return result; - // } - // - // void main() { - // int x = foo(); - // } - // - // but with modifications: Using Phi instead of load/store, and the - // return block in foo appears before the "then" block. - - const std::string predefs = - R"(OpCapability Shader -%1 = OpExtInstImport "GLSL.std.450" -OpMemoryModel Logical GLSL450 -OpEntryPoint Vertex %main "main" -OpSource GLSL 450 -OpName %main "main" -OpName %foo_ "foo(" -OpName %x "x" -%void = OpTypeVoid -%6 = OpTypeFunction %void -%int = OpTypeInt 32 1 -%8 = OpTypeFunction %int -%bool = OpTypeBool -%true = OpConstantTrue %bool -%int_0 = OpConstant %int 0 -%_ptr_Function_int = OpTypePointer Function %int -)"; - - const std::string nonEntryFuncs = - R"(%foo_ = OpFunction %int None %8 -%13 = OpLabel -%14 = OpCopyObject %int %int_0 -OpSelectionMerge %15 None -OpBranchConditional %true %16 %15 -%15 = OpLabel -%17 = OpPhi %int %14 %13 %18 %16 -OpReturnValue %17 -%16 = OpLabel -%18 = OpCopyObject %int %int_0 -OpBranch %15 -OpFunctionEnd -)"; - - const std::string before = - R"(%main = OpFunction %void None %6 -%19 = OpLabel -%x = OpVariable %_ptr_Function_int Function -%20 = OpFunctionCall %int %foo_ -OpStore %x %20 -OpReturn -OpFunctionEnd -)"; - - const std::string after = - R"(%main = OpFunction %void None %6 -%19 = OpLabel -%21 = OpVariable %_ptr_Function_int Function -%x = OpVariable %_ptr_Function_int Function -%22 = OpCopyObject %int %int_0 -OpSelectionMerge %23 None -OpBranchConditional %true %24 %23 -%23 = OpLabel -%26 = OpPhi %int %22 %19 %25 %24 -OpStore %21 %26 -OpBranch %27 -%24 = OpLabel -%25 = OpCopyObject %int %int_0 -OpBranch %23 -%27 = OpLabel -%20 = OpLoad %int %21 -OpStore %x %20 -OpReturn -OpFunctionEnd -)"; - - SinglePassRunAndCheck(predefs + nonEntryFuncs + before, - predefs + nonEntryFuncs + after, - false, true); -} - TEST_F(InlineTest, EarlyReturnInLoopIsNotInlined) { // #version 140 // @@ -1820,8 +1535,8 @@ OpFunctionEnd OpBranch %10 %10 = OpLabel OpLoopMerge %12 %10 None -OpBranch %13 -%13 = OpLabel +OpBranch %14 +%14 = OpLabel OpBranchConditional %true %10 %12 %12 = OpLabel OpReturn @@ -1890,11 +1605,11 @@ OpFunctionEnd OpBranch %18 %18 = OpLabel %19 = OpCopyObject %int %int_3 -%25 = OpCopyObject %int %int_1 +%26 = OpCopyObject %int %int_1 OpLoopMerge %22 %23 None -OpBranch %26 -%26 = OpLabel -%27 = OpCopyObject %int %int_2 +OpBranch %27 +%27 = OpLabel +%28 = OpCopyObject %int %int_2 %21 = OpCopyObject %int %int_4 OpBranchConditional %true %23 %22 %23 = OpLabel @@ -1983,11 +1698,11 @@ OpBranch %13 OpLoopMerge %16 %13 None OpBranch %17 %17 = OpLabel -%18 = OpCopyObject %bool %true -OpSelectionMerge %19 None -OpBranchConditional %true %19 %19 -%19 = OpLabel -%20 = OpPhi %bool %18 %17 +%19 = OpCopyObject %bool %true +OpSelectionMerge %20 None +OpBranchConditional %true %20 %20 +%20 = OpLabel +%21 = OpPhi %bool %19 %17 OpBranchConditional %true %13 %16 %16 = OpLabel OpReturn @@ -2060,11 +1775,11 @@ OpBranch %18 OpLoopMerge %22 %23 None OpBranch %25 %25 = OpLabel -%26 = OpCopyObject %int %int_1 -OpSelectionMerge %27 None -OpBranchConditional %true %27 %27 -%27 = OpLabel -%28 = OpCopyObject %int %int_2 +%27 = OpCopyObject %int %int_1 +OpSelectionMerge %28 None +OpBranchConditional %true %28 %28 +%28 = OpLabel +%29 = OpCopyObject %int %int_2 %21 = OpCopyObject %int %int_4 OpBranchConditional %true %23 %22 %23 = OpLabel @@ -2080,165 +1795,6 @@ OpFunctionEnd false, true); } -TEST_F( - InlineTest, - SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMergeAndMultiReturns) { - // This is similar to SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMerge - // except that in addition to starting with a selection header, the - // callee also has multi returns. - // - // So now we have to accommodate: - // - The caller's OpLoopMerge (which must move to the first block) - // - The single-trip loop to wrap the multi returns, and - // - The callee's selection merge in its first block. - // Each of these must go into their own blocks. - - const std::string predefs = - R"(OpCapability Shader -OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %1 "main" -OpSource OpenCL_C 120 -%bool = OpTypeBool -%int = OpTypeInt 32 1 -%true = OpConstantTrue %bool -%false = OpConstantFalse %bool -%int_0 = OpConstant %int 0 -%int_1 = OpConstant %int 1 -%int_2 = OpConstant %int 2 -%int_3 = OpConstant %int 3 -%int_4 = OpConstant %int 4 -%void = OpTypeVoid -%12 = OpTypeFunction %void -)"; - - const std::string nonEntryFuncs = - R"(%13 = OpFunction %void None %12 -%14 = OpLabel -%15 = OpCopyObject %int %int_0 -OpReturn -%16 = OpLabel -%17 = OpCopyObject %int %int_1 -OpReturn -OpFunctionEnd -)"; - - const std::string before = - R"(%1 = OpFunction %void None %12 -%18 = OpLabel -OpBranch %19 -%19 = OpLabel -%20 = OpCopyObject %int %int_2 -%21 = OpFunctionCall %void %13 -%22 = OpCopyObject %int %int_3 -OpLoopMerge %23 %19 None -OpBranchConditional %true %19 %23 -%23 = OpLabel -%24 = OpCopyObject %int %int_4 -OpReturn -OpFunctionEnd -)"; - - const std::string after = - R"(%1 = OpFunction %void None %12 -%18 = OpLabel -OpBranch %19 -%19 = OpLabel -%20 = OpCopyObject %int %int_2 -%25 = OpCopyObject %int %int_0 -OpLoopMerge %23 %19 None -OpBranch %26 -%27 = OpLabel -%28 = OpCopyObject %int %int_1 -OpBranch %26 -%26 = OpLabel -%22 = OpCopyObject %int %int_3 -OpBranchConditional %true %19 %23 -%23 = OpLabel -%24 = OpCopyObject %int %int_4 -OpReturn -OpFunctionEnd -)"; - - SinglePassRunAndCheck(predefs + nonEntryFuncs + before, - predefs + nonEntryFuncs + after, - false, true); -} - -TEST_F(InlineTest, CalleeWithMultiReturnAndPhiRequiresEntryBlockRemapping) { - // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/790 - // - // The callee has multiple returns, and so must be wrapped with a single-trip - // loop. That code must remap the callee entry block ID to the introduced - // loop body's ID. Otherwise you can get a dominance error in a cloned OpPhi. - - const std::string predefs = - R"(OpCapability Shader -OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %1 "main" -OpSource OpenCL_C 120 -%int = OpTypeInt 32 1 -%int_0 = OpConstant %int 0 -%int_1 = OpConstant %int 1 -%int_2 = OpConstant %int 2 -%int_3 = OpConstant %int 3 -%int_4 = OpConstant %int 4 -%void = OpTypeVoid -%9 = OpTypeFunction %void -%bool = OpTypeBool -%false = OpConstantFalse %bool -)"; - - // This callee has multiple returns, and a Phi in the second block referencing - // a value generated in the entry block. - const std::string nonEntryFuncs = - R"(%12 = OpFunction %void None %9 -%13 = OpLabel -%14 = OpCopyObject %int %int_0 -OpBranch %15 -%15 = OpLabel -%16 = OpPhi %int %14 %13 -%17 = OpCopyObject %int %int_1 -OpReturn -%18 = OpLabel -%19 = OpCopyObject %int %int_2 -OpReturn -OpFunctionEnd -)"; - - const std::string before = - R"(%1 = OpFunction %void None %9 -%20 = OpLabel -%21 = OpCopyObject %int %int_3 -%22 = OpFunctionCall %void %12 -%23 = OpCopyObject %int %int_4 -OpReturn -OpFunctionEnd -)"; - - const std::string after = - R"(%1 = OpFunction %void None %9 -%20 = OpLabel -%21 = OpCopyObject %int %int_3 -%24 = OpCopyObject %int %int_0 -OpBranch %25 -%25 = OpLabel -%26 = OpPhi %int %24 %20 -%27 = OpCopyObject %int %int_1 -OpBranch %28 -%29 = OpLabel -%30 = OpCopyObject %int %int_2 -OpBranch %28 -%28 = OpLabel -%23 = OpCopyObject %int %int_4 -OpReturn -OpFunctionEnd -)"; - - SinglePassRunAndCheck(predefs + nonEntryFuncs + before, - predefs + nonEntryFuncs + after, - false, true); -} - TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) { // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018 // @@ -2324,138 +1880,6 @@ OpFunctionEnd predefs + caller + callee, predefs + caller + callee, false, true); } -TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) { - // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018 - // - // The callee has a single return, but needs single-trip loop wrapper - // to be inlined because the return is in a selection structure. - - const std::string predefs = - R"(OpCapability Shader -%1 = OpExtInstImport "GLSL.std.450" -OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %main "main" %_GLF_color -OpExecutionMode %main OriginUpperLeft -OpSource ESSL 310 -OpName %main "main" -OpName %f_ "f(" -OpName %i "i" -OpName %_GLF_color "_GLF_color" -OpDecorate %_GLF_color Location 0 -%void = OpTypeVoid -%7 = OpTypeFunction %void -%float = OpTypeFloat 32 -%9 = OpTypeFunction %float -%float_1 = OpConstant %float 1 -%bool = OpTypeBool -%false = OpConstantFalse %bool -%true = OpConstantTrue %bool -%int = OpTypeInt 32 1 -%_ptr_Function_int = OpTypePointer Function %int -%int_0 = OpConstant %int 0 -%int_1 = OpConstant %int 1 -%v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_GLF_color = OpVariable %_ptr_Output_v4float Output -%float_0 = OpConstant %float 0 -%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 -%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 -)"; - - const std::string new_predefs = - R"(%_ptr_Function_float = OpTypePointer Function %float -)"; - - const std::string main_before = - R"(%main = OpFunction %void None %7 -%23 = OpLabel -%i = OpVariable %_ptr_Function_int Function -OpStore %i %int_0 -OpBranch %24 -%24 = OpLabel -OpLoopMerge %25 %26 None -OpBranch %27 -%27 = OpLabel -%28 = OpLoad %int %i -%29 = OpSLessThan %bool %28 %int_1 -OpBranchConditional %29 %30 %25 -%30 = OpLabel -OpStore %_GLF_color %21 -%31 = OpFunctionCall %float %f_ -OpBranch %26 -%26 = OpLabel -%32 = OpLoad %int %i -%33 = OpIAdd %int %32 %int_1 -OpStore %i %33 -OpBranch %24 -%25 = OpLabel -OpStore %_GLF_color %22 -OpReturn -OpFunctionEnd -)"; - - const std::string main_after = - R"(%main = OpFunction %void None %7 -%23 = OpLabel -%38 = OpVariable %_ptr_Function_float Function -%i = OpVariable %_ptr_Function_int Function -OpStore %i %int_0 -OpBranch %24 -%24 = OpLabel -OpLoopMerge %25 %26 None -OpBranch %27 -%27 = OpLabel -%28 = OpLoad %int %i -%29 = OpSLessThan %bool %28 %int_1 -OpBranchConditional %29 %30 %25 -%30 = OpLabel -OpStore %_GLF_color %21 -OpBranch %39 -%39 = OpLabel -OpLoopMerge %40 %41 None -OpBranch %42 -%42 = OpLabel -OpSelectionMerge %43 None -OpBranchConditional %true %44 %43 -%44 = OpLabel -OpStore %38 %float_1 -OpBranch %40 -%43 = OpLabel -OpStore %38 %float_1 -OpBranch %40 -%41 = OpLabel -OpBranchConditional %false %39 %40 -%40 = OpLabel -%31 = OpLoad %float %38 -OpBranch %26 -%26 = OpLabel -%32 = OpLoad %int %i -%33 = OpIAdd %int %32 %int_1 -OpStore %i %33 -OpBranch %24 -%25 = OpLabel -OpStore %_GLF_color %22 -OpReturn -OpFunctionEnd -)"; - - const std::string callee = - R"(%f_ = OpFunction %float None %9 -%34 = OpLabel -OpSelectionMerge %35 None -OpBranchConditional %true %36 %35 -%36 = OpLabel -OpReturnValue %float_1 -%35 = OpLabel -OpReturnValue %float_1 -OpFunctionEnd -)"; - - SinglePassRunAndCheck( - predefs + main_before + callee, - predefs + new_predefs + main_after + callee, false, true); -} - TEST_F(InlineTest, Decorated1) { // Same test as Simple with the difference // that OpFAdd in the outlined function is @@ -2526,7 +1950,7 @@ OpFunctionEnd )"; const std::string after = - R"(OpDecorate %37 RelaxedPrecision + R"(OpDecorate %38 RelaxedPrecision %void = OpTypeVoid %11 = OpTypeFunction %void %float = OpTypeFloat 32 @@ -2548,12 +1972,12 @@ OpFunctionEnd %param = OpVariable %_ptr_Function_v4float Function %23 = OpLoad %v4float %BaseColor OpStore %param %23 -%33 = OpAccessChain %_ptr_Function_float %param %uint_0 -%34 = OpLoad %float %33 -%35 = OpAccessChain %_ptr_Function_float %param %uint_1 -%36 = OpLoad %float %35 -%37 = OpFAdd %float %34 %36 -OpStore %32 %37 +%34 = OpAccessChain %_ptr_Function_float %param %uint_0 +%35 = OpLoad %float %34 +%36 = OpAccessChain %_ptr_Function_float %param %uint_1 +%37 = OpLoad %float %36 +%38 = OpFAdd %float %35 %37 +OpStore %32 %38 %24 = OpLoad %float %32 %25 = OpCompositeConstruct %v4float %24 %24 %24 %24 OpStore %color %25 @@ -2672,12 +2096,12 @@ OpFunctionEnd %param = OpVariable %_ptr_Function_v4float Function %22 = OpLoad %v4float %BaseColor OpStore %param %22 -%33 = OpAccessChain %_ptr_Function_float %param %uint_0 -%34 = OpLoad %float %33 -%35 = OpAccessChain %_ptr_Function_float %param %uint_1 -%36 = OpLoad %float %35 -%37 = OpFAdd %float %34 %36 -OpStore %32 %37 +%34 = OpAccessChain %_ptr_Function_float %param %uint_0 +%35 = OpLoad %float %34 +%36 = OpAccessChain %_ptr_Function_float %param %uint_1 +%37 = OpLoad %float %36 +%38 = OpFAdd %float %35 %37 +OpStore %32 %38 %23 = OpLoad %float %32 %24 = OpCompositeConstruct %v4float %23 %23 %23 %23 OpStore %color %24 @@ -3017,7 +2441,7 @@ OpName %kill_ "kill(" %main = OpFunction %void None %3 %5 = OpLabel OpKill -%17 = OpLabel +%18 = OpLabel OpReturn OpFunctionEnd %kill_ = OpFunction %void None %3 @@ -3030,6 +2454,560 @@ OpFunctionEnd SinglePassRunAndCheck(before, after, false, true); } +TEST_F(InlineTest, EarlyReturnFunctionInlined) { + // #version 140 + // + // in vec4 BaseColor; + // + // float foo(vec4 bar) + // { + // if (bar.x < 0.0) + // return 0.0; + // return bar.x; + // } + // + // void main() + // { + // vec4 color = vec4(foo(BaseColor)); + // gl_FragColor = color; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %foo_vf4_ "foo(vf4;" +OpName %bar "bar" +OpName %color "color" +OpName %BaseColor "BaseColor" +OpName %param "param" +OpName %gl_FragColor "gl_FragColor" +%void = OpTypeVoid +%10 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%14 = OpTypeFunction %float %_ptr_Function_v4float +%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float +%float_0 = OpConstant %float 0 +%bool = OpTypeBool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string foo = + R"(%foo_vf4_ = OpFunction %float None %14 +%bar = OpFunctionParameter %_ptr_Function_v4float +%27 = OpLabel +%28 = OpAccessChain %_ptr_Function_float %bar %uint_0 +%29 = OpLoad %float %28 +%30 = OpFOrdLessThan %bool %29 %float_0 +OpSelectionMerge %31 None +OpBranchConditional %30 %32 %31 +%32 = OpLabel +OpReturnValue %float_0 +%31 = OpLabel +%33 = OpAccessChain %_ptr_Function_float %bar %uint_0 +%34 = OpLoad %float %33 +OpReturnValue %34 +OpFunctionEnd +)"; + + const std::string fooMergeReturn = + R"(%foo_vf4_ = OpFunction %float None %14 +%bar = OpFunctionParameter %_ptr_Function_v4float +%27 = OpLabel +%41 = OpVariable %_ptr_Function_bool Function %false +%36 = OpVariable %_ptr_Function_float Function +OpSelectionMerge %35 None +OpSwitch %uint_0 %38 +%38 = OpLabel +%28 = OpAccessChain %_ptr_Function_float %bar %uint_0 +%29 = OpLoad %float %28 +%30 = OpFOrdLessThan %bool %29 %float_0 +OpSelectionMerge %31 None +OpBranchConditional %30 %32 %31 +%32 = OpLabel +OpStore %41 %true +OpStore %36 %float_0 +OpBranch %35 +%31 = OpLabel +%33 = OpAccessChain %_ptr_Function_float %bar %uint_0 +%34 = OpLoad %float %33 +OpStore %41 %true +OpStore %36 %34 +OpBranch %35 +%35 = OpLabel +%37 = OpLoad %float %36 +OpReturnValue %37 +OpFunctionEnd +)"; + + const std::string before = + R"(%main = OpFunction %void None %10 +%22 = OpLabel +%color = OpVariable %_ptr_Function_v4float Function +%param = OpVariable %_ptr_Function_v4float Function +%23 = OpLoad %v4float %BaseColor +OpStore %param %23 +%24 = OpFunctionCall %float %foo_vf4_ %param +%25 = OpCompositeConstruct %v4float %24 %24 %24 %24 +OpStore %color %25 +%26 = OpLoad %v4float %color +OpStore %gl_FragColor %26 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%false = OpConstantFalse %bool +%_ptr_Function_bool = OpTypePointer Function %bool +%true = OpConstantTrue %bool +%main = OpFunction %void None %10 +%22 = OpLabel +%43 = OpVariable %_ptr_Function_bool Function %false +%44 = OpVariable %_ptr_Function_float Function +%45 = OpVariable %_ptr_Function_float Function +%color = OpVariable %_ptr_Function_v4float Function +%param = OpVariable %_ptr_Function_v4float Function +%23 = OpLoad %v4float %BaseColor +OpStore %param %23 +OpStore %43 %false +OpSelectionMerge %55 None +OpSwitch %uint_0 %47 +%47 = OpLabel +%48 = OpAccessChain %_ptr_Function_float %param %uint_0 +%49 = OpLoad %float %48 +%50 = OpFOrdLessThan %bool %49 %float_0 +OpSelectionMerge %52 None +OpBranchConditional %50 %51 %52 +%51 = OpLabel +OpStore %43 %true +OpStore %44 %float_0 +OpBranch %55 +%52 = OpLabel +%53 = OpAccessChain %_ptr_Function_float %param %uint_0 +%54 = OpLoad %float %53 +OpStore %43 %true +OpStore %44 %54 +OpBranch %55 +%55 = OpLabel +%56 = OpLoad %float %44 +OpStore %45 %56 +%24 = OpLoad %float %45 +%25 = OpCompositeConstruct %v4float %24 %24 %24 %24 +OpStore %color %25 +%26 = OpLoad %v4float %color +OpStore %gl_FragColor %26 +OpReturn +OpFunctionEnd +)"; + + // The early return case must be handled by merge-return first. + AddPass(); + AddPass(); + RunAndCheck(predefs + before + foo, predefs + after + fooMergeReturn); +} + +TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) { + // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755 + // + // Original example is derived from: + // + // #version 450 + // + // float foo() { + // if (true) { + // } + // } + // + // void main() { foo(); } + // + // But the order of basic blocks in foo is changed so that the return + // block is listed second-last. There is only one return in the callee + // but it does not appear last. + + const std::string predefs = + R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Vertex %main "main" +OpSource GLSL 450 +OpName %main "main" +OpName %foo_ "foo(" +%void = OpTypeVoid +%4 = OpTypeFunction %void +%bool = OpTypeBool +%true = OpConstantTrue %bool +)"; + + const std::string foo = + R"(%foo_ = OpFunction %void None %4 +%7 = OpLabel +OpSelectionMerge %8 None +OpBranchConditional %true %9 %8 +%8 = OpLabel +OpReturn +%9 = OpLabel +OpBranch %8 +OpFunctionEnd +)"; + + const std::string fooMergeReturn = + R"(%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%false = OpConstantFalse %bool +%_ptr_Function_bool = OpTypePointer Function %bool +%foo_ = OpFunction %void None %4 +%7 = OpLabel +%18 = OpVariable %_ptr_Function_bool Function %false +OpSelectionMerge %12 None +OpSwitch %uint_0 %13 +%13 = OpLabel +OpSelectionMerge %8 None +OpBranchConditional %true %9 %8 +%8 = OpLabel +OpStore %18 %true +OpBranch %12 +%9 = OpLabel +OpBranch %8 +%12 = OpLabel +OpReturn +OpFunctionEnd +)"; + + const std::string before = + R"(%main = OpFunction %void None %4 +%10 = OpLabel +%11 = OpFunctionCall %void %foo_ +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %4 +%10 = OpLabel +%19 = OpVariable %_ptr_Function_bool Function %false +OpStore %19 %false +OpSelectionMerge %24 None +OpSwitch %uint_0 %21 +%21 = OpLabel +OpSelectionMerge %22 None +OpBranchConditional %true %23 %22 +%22 = OpLabel +OpStore %19 %true +OpBranch %24 +%23 = OpLabel +OpBranch %22 +%24 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // The early return case must be handled by merge-return first. + AddPass(); + AddPass(); + RunAndCheck(predefs + foo + before, predefs + fooMergeReturn + after); +} + +TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) { + // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018 + // + // The callee has a single return, but needs single-trip loop wrapper + // to be inlined because the return is in a selection structure. + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_GLF_color +OpExecutionMode %main OriginUpperLeft +OpSource ESSL 310 +OpName %main "main" +OpName %f_ "f(" +OpName %i "i" +OpName %_GLF_color "_GLF_color" +OpDecorate %_GLF_color Location 0 +%void = OpTypeVoid +%7 = OpTypeFunction %void +%float = OpTypeFloat 32 +%9 = OpTypeFunction %float +%float_1 = OpConstant %float 1 +%bool = OpTypeBool +%false = OpConstantFalse %bool +%true = OpConstantTrue %bool +%int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%int_0 = OpConstant %int 0 +%int_1 = OpConstant %int 1 +%v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_GLF_color = OpVariable %_ptr_Output_v4float Output +%float_0 = OpConstant %float 0 +%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 +)"; + + const std::string new_predefs = + R"(%_ptr_Function_float = OpTypePointer Function %float +%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%_ptr_Function_bool = OpTypePointer Function %bool +)"; + + const std::string main_before = + R"(%main = OpFunction %void None %7 +%23 = OpLabel +%i = OpVariable %_ptr_Function_int Function +OpStore %i %int_0 +OpBranch %24 +%24 = OpLabel +OpLoopMerge %25 %26 None +OpBranch %27 +%27 = OpLabel +%28 = OpLoad %int %i +%29 = OpSLessThan %bool %28 %int_1 +OpBranchConditional %29 %30 %25 +%30 = OpLabel +OpStore %_GLF_color %21 +%31 = OpFunctionCall %float %f_ +OpBranch %26 +%26 = OpLabel +%32 = OpLoad %int %i +%33 = OpIAdd %int %32 %int_1 +OpStore %i %33 +OpBranch %24 +%25 = OpLabel +OpStore %_GLF_color %22 +OpReturn +OpFunctionEnd +)"; + + const std::string main_after = + R"(%main = OpFunction %void None %7 +%23 = OpLabel +%46 = OpVariable %_ptr_Function_bool Function %false +%47 = OpVariable %_ptr_Function_float Function +%48 = OpVariable %_ptr_Function_float Function +%i = OpVariable %_ptr_Function_int Function +OpStore %i %int_0 +OpBranch %24 +%24 = OpLabel +OpLoopMerge %25 %26 None +OpBranch %27 +%27 = OpLabel +%28 = OpLoad %int %i +%29 = OpSLessThan %bool %28 %int_1 +OpBranchConditional %29 %30 %25 +%30 = OpLabel +OpStore %_GLF_color %21 +OpStore %46 %false +OpSelectionMerge %53 None +OpSwitch %uint_0 %50 +%50 = OpLabel +OpSelectionMerge %52 None +OpBranchConditional %true %51 %52 +%51 = OpLabel +OpStore %46 %true +OpStore %47 %float_1 +OpBranch %53 +%52 = OpLabel +OpStore %46 %true +OpStore %47 %float_1 +OpBranch %53 +%53 = OpLabel +%54 = OpLoad %float %47 +OpStore %48 %54 +%31 = OpLoad %float %48 +OpBranch %26 +%26 = OpLabel +%32 = OpLoad %int %i +%33 = OpIAdd %int %32 %int_1 +OpStore %i %33 +OpBranch %24 +%25 = OpLabel +OpStore %_GLF_color %22 +OpReturn +OpFunctionEnd +)"; + + const std::string callee = + R"(%f_ = OpFunction %float None %9 +%34 = OpLabel +OpSelectionMerge %35 None +OpBranchConditional %true %36 %35 +%36 = OpLabel +OpReturnValue %float_1 +%35 = OpLabel +OpReturnValue %float_1 +OpFunctionEnd +)"; + + const std::string calleeMergeReturn = + R"(%f_ = OpFunction %float None %9 +%34 = OpLabel +%45 = OpVariable %_ptr_Function_bool Function %false +%39 = OpVariable %_ptr_Function_float Function +OpSelectionMerge %37 None +OpSwitch %uint_0 %41 +%41 = OpLabel +OpSelectionMerge %35 None +OpBranchConditional %true %36 %35 +%36 = OpLabel +OpStore %45 %true +OpStore %39 %float_1 +OpBranch %37 +%35 = OpLabel +OpStore %45 %true +OpStore %39 %float_1 +OpBranch %37 +%37 = OpLabel +%40 = OpLoad %float %39 +OpReturnValue %40 +OpFunctionEnd +)"; + + // The early return case must be handled by merge-return first. + AddPass(); + AddPass(); + RunAndCheck(predefs + main_before + callee, + predefs + new_predefs + main_after + calleeMergeReturn); +} + +TEST_F(InlineTest, ForwardReferencesInPhiInlined) { + // The basic structure of the test case is like this: + // + // int foo() { + // int result = 1; + // if (true) { + // result = 1; + // } + // return result; + // } + // + // void main() { + // int x = foo(); + // } + // + // but with modifications: Using Phi instead of load/store, and the + // return block in foo appears before the "then" block. + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Vertex %main "main" +OpSource GLSL 450 +OpName %main "main" +OpName %foo_ "foo(" +OpName %x "x" +%void = OpTypeVoid +%6 = OpTypeFunction %void +%int = OpTypeInt 32 1 +%8 = OpTypeFunction %int +%bool = OpTypeBool +%true = OpConstantTrue %bool +%int_0 = OpConstant %int 0 +%_ptr_Function_int = OpTypePointer Function %int +)"; + + const std::string callee = + R"(%foo_ = OpFunction %int None %8 +%13 = OpLabel +%14 = OpCopyObject %int %int_0 +OpSelectionMerge %15 None +OpBranchConditional %true %16 %15 +%15 = OpLabel +%17 = OpPhi %int %14 %13 %18 %16 +OpReturnValue %17 +%16 = OpLabel +%18 = OpCopyObject %int %int_0 +OpBranch %15 +OpFunctionEnd +)"; + + const std::string calleeMergeReturn = + R"(%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%false = OpConstantFalse %bool +%_ptr_Function_bool = OpTypePointer Function %bool +%foo_ = OpFunction %int None %8 +%13 = OpLabel +%29 = OpVariable %_ptr_Function_bool Function %false +%22 = OpVariable %_ptr_Function_int Function +OpSelectionMerge %21 None +OpSwitch %uint_0 %24 +%24 = OpLabel +%14 = OpCopyObject %int %int_0 +OpSelectionMerge %15 None +OpBranchConditional %true %16 %15 +%15 = OpLabel +%17 = OpPhi %int %14 %24 %18 %16 +OpStore %29 %true +OpStore %22 %17 +OpBranch %21 +%16 = OpLabel +%18 = OpCopyObject %int %int_0 +OpBranch %15 +%21 = OpLabel +%23 = OpLoad %int %22 +OpReturnValue %23 +OpFunctionEnd +)"; + + const std::string before = + R"(%main = OpFunction %void None %6 +%19 = OpLabel +%x = OpVariable %_ptr_Function_int Function +%20 = OpFunctionCall %int %foo_ +OpStore %x %20 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %6 +%19 = OpLabel +%30 = OpVariable %_ptr_Function_bool Function %false +%31 = OpVariable %_ptr_Function_int Function +%32 = OpVariable %_ptr_Function_int Function +%x = OpVariable %_ptr_Function_int Function +OpStore %30 %false +OpSelectionMerge %40 None +OpSwitch %uint_0 %34 +%34 = OpLabel +%35 = OpCopyObject %int %int_0 +OpSelectionMerge %36 None +OpBranchConditional %true %38 %36 +%36 = OpLabel +%37 = OpPhi %int %35 %34 %39 %38 +OpStore %30 %true +OpStore %31 %37 +OpBranch %40 +%38 = OpLabel +%39 = OpCopyObject %int %int_0 +OpBranch %36 +%40 = OpLabel +%41 = OpLoad %int %31 +OpStore %32 %41 +%20 = OpLoad %int %32 +OpStore %x %20 +OpReturn +OpFunctionEnd +)"; + + AddPass(); + AddPass(); + RunAndCheck(predefs + callee + before, predefs + calleeMergeReturn + after); +} + // TODO(greg-lunarg): Add tests to verify handling of these cases: // // Empty modules