diff --git a/source/opt/dead_branch_elim_pass.cpp b/source/opt/dead_branch_elim_pass.cpp index 7f3e737c4..c65e55104 100644 --- a/source/opt/dead_branch_elim_pass.cpp +++ b/source/opt/dead_branch_elim_pass.cpp @@ -93,7 +93,7 @@ BasicBlock* DeadBranchElimPass::GetParentBlock(uint32_t id) { bool DeadBranchElimPass::MarkLiveBlocks( Function* func, std::unordered_set* live_blocks) { - StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuedCFGAnalaysis(); + StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuredCFGAnalysis(); std::unordered_set continues; std::vector stack; diff --git a/source/opt/inline_pass.cpp b/source/opt/inline_pass.cpp index 5a88ef5d3..0543f4487 100644 --- a/source/opt/inline_pass.cpp +++ b/source/opt/inline_pass.cpp @@ -27,7 +27,6 @@ static const int kSpvFunctionCallFunctionId = 2; static const int kSpvFunctionCallArgumentId = 3; static const int kSpvReturnValueId = 0; -static const int kSpvLoopMergeMergeBlockId = 0; static const int kSpvLoopMergeContinueTargetIdInIdx = 1; namespace spvtools { @@ -225,8 +224,8 @@ void InlinePass::GenInlineCode( kSpvFunctionCallFunctionId)]; // Check for multiple returns in the callee. - auto fi = multi_return_funcs_.find(calleeFn->result_id()); - const bool multiReturn = fi != multi_return_funcs_.end(); + auto fi = early_return_funcs_.find(calleeFn->result_id()); + const bool earlyReturn = fi != early_return_funcs_.end(); // Map parameters to actual arguments. MapParams(calleeFn, call_inst_itr, &callee2caller); @@ -280,7 +279,7 @@ void InlinePass::GenInlineCode( &call_inst_itr, &new_blk_ptr, &prevInstWasReturn, &returnLabelId, &returnVarId, caller_is_loop_header, callee_begins_with_structured_header, &calleeTypeId, - &multiBlocks, &postCallSB, &preCallSB, multiReturn, + &multiBlocks, &postCallSB, &preCallSB, earlyReturn, &singleTripLoopHeaderId, &singleTripLoopContinueId, &callee_result_ids, this](const Instruction* cpi) { switch (cpi->opcode()) { @@ -369,7 +368,7 @@ void InlinePass::GenInlineCode( // satisfy dominance. callee2caller[cpi->result_id()] = guard_block_id; } - // If callee has multiple returns, insert a header block for + // If callee has early return, insert a header block for // single-trip loop that will encompass callee code. Start postheader // block. // @@ -380,7 +379,7 @@ void InlinePass::GenInlineCode( // We still need to split the caller block and insert a guard block. // But we only need to do it once. We haven't done it yet, but the // single-trip loop header will serve the same purpose. - if (multiReturn) { + if (earlyReturn) { singleTripLoopHeaderId = this->TakeNextId(); AddBranch(singleTripLoopHeaderId, &new_blk_ptr); new_blocks->push_back(std::move(new_blk_ptr)); @@ -429,9 +428,9 @@ void InlinePass::GenInlineCode( // If previous instruction was return, insert branch instruction // to return block. if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr); - if (multiReturn) { - // If we generated a loop header to for the single-trip loop - // to accommodate multiple returns, insert the continue + if (earlyReturn) { + // If we generated a loop header for the single-trip loop + // to accommodate early returns, insert the continue // target block now, with a false branch back to the loop header. new_blocks->push_back(std::move(new_blk_ptr)); new_blk_ptr = @@ -561,44 +560,24 @@ void InlinePass::UpdateSucceedingPhis( }); } -bool InlinePass::HasMultipleReturns(Function* func) { - bool seenReturn = false; - bool multipleReturns = false; +bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) { + // If control not structured, do not do loop/return analysis + // TODO: Analyze returns in non-structured control flow + if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader)) + return false; + const auto structured_analysis = context()->GetStructuredCFGAnalysis(); + // Search for returns in structured construct. + bool return_in_construct = false; for (auto& blk : *func) { auto terminal_ii = blk.cend(); --terminal_ii; - if (terminal_ii->opcode() == SpvOpReturn || - terminal_ii->opcode() == SpvOpReturnValue) { - if (seenReturn) { - multipleReturns = true; - break; - } - seenReturn = true; + if (spvOpcodeIsReturn(terminal_ii->opcode()) && + structured_analysis->ContainingConstruct(blk.id()) != 0) { + return_in_construct = true; + break; } } - return multipleReturns; -} - -void InlinePass::ComputeStructuredSuccessors(Function* func) { - // If header, make merge block first successor. - for (auto& blk : *func) { - uint32_t mbid = blk.MergeBlockIdIfAny(); - if (mbid != 0) { - block2structured_succs_[&blk].push_back(id2block_[mbid]); - } - - // Add true successors. - const auto& const_blk = blk; - const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) { - block2structured_succs_[&blk].push_back(id2block_[sbid]); - }); - } -} - -InlinePass::GetBlocksFunction InlinePass::StructuredSuccessorsFunction() { - return [this](const BasicBlock* block) { - return &(block2structured_succs_[block]); - }; + return !return_in_construct; } bool InlinePass::HasNoReturnInLoop(Function* func) { @@ -606,52 +585,27 @@ bool InlinePass::HasNoReturnInLoop(Function* func) { // TODO: Analyze returns in non-structured control flow if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader)) return false; - // Compute structured block order. This order has the property - // that dominators are before all blocks they dominate and merge blocks - // are after all blocks that are in the control constructs of their header. - ComputeStructuredSuccessors(func); - auto ignore_block = [](cbb_ptr) {}; - auto ignore_edge = [](cbb_ptr, cbb_ptr) {}; - std::list structuredOrder; - CFA::DepthFirstTraversal( - &*func->begin(), StructuredSuccessorsFunction(), ignore_block, - [&](cbb_ptr b) { structuredOrder.push_front(b); }, ignore_edge); - // Search for returns in loops. Only need to track outermost loop + const auto structured_analysis = context()->GetStructuredCFGAnalysis(); + // Search for returns in structured construct. bool return_in_loop = false; - uint32_t outerLoopMergeId = 0; - for (auto& blk : structuredOrder) { - // Exiting current outer loop - if (blk->id() == outerLoopMergeId) outerLoopMergeId = 0; - // Return block - auto terminal_ii = blk->cend(); + for (auto& blk : *func) { + auto terminal_ii = blk.cend(); --terminal_ii; - if (terminal_ii->opcode() == SpvOpReturn || - terminal_ii->opcode() == SpvOpReturnValue) { - if (outerLoopMergeId != 0) { - return_in_loop = true; - break; - } - } else if (terminal_ii != blk->cbegin()) { - auto merge_ii = terminal_ii; - --merge_ii; - // Entering outermost loop - if (merge_ii->opcode() == SpvOpLoopMerge && outerLoopMergeId == 0) - outerLoopMergeId = - merge_ii->GetSingleWordOperand(kSpvLoopMergeMergeBlockId); + if (spvOpcodeIsReturn(terminal_ii->opcode()) && + structured_analysis->ContainingLoop(blk.id()) != 0) { + return_in_loop = true; + break; } } return !return_in_loop; } void InlinePass::AnalyzeReturns(Function* func) { - // Look for multiple returns - if (!HasMultipleReturns(func)) { + if (HasNoReturnInLoop(func)) { no_return_in_loop_.insert(func->result_id()); - return; + if (!HasNoReturnInStructuredConstruct(func)) + early_return_funcs_.insert(func->result_id()); } - multi_return_funcs_.insert(func->result_id()); - // If multiple returns, see if any are in a loop - if (HasNoReturnInLoop(func)) no_return_in_loop_.insert(func->result_id()); } bool InlinePass::IsInlinableFunction(Function* func) { @@ -673,10 +627,9 @@ void InlinePass::InitializeInline() { // clear collections id2function_.clear(); id2block_.clear(); - block2structured_succs_.clear(); inlinable_.clear(); no_return_in_loop_.clear(); - multi_return_funcs_.clear(); + early_return_funcs_.clear(); for (auto& fn : *get_module()) { // Initialize function and block maps. diff --git a/source/opt/inline_pass.h b/source/opt/inline_pass.h index 55369c98c..e23e7f0f8 100644 --- a/source/opt/inline_pass.h +++ b/source/opt/inline_pass.h @@ -36,9 +36,6 @@ class InlinePass : public Pass { using cbb_ptr = const BasicBlock*; public: - using GetBlocksFunction = - std::function*(const BasicBlock*)>; - virtual ~InlinePass() = default; protected: @@ -122,21 +119,9 @@ class InlinePass : public Pass { // Return true if |inst| is a function call that can be inlined. bool IsInlinableFunctionCall(const Instruction* inst); - // Compute structured successors for function |func|. - // A block's structured successors are the blocks it branches to - // together with its declared merge block if it has one. - // When order matters, the merge block always appears first. - // This assures correct depth first search in the presence of early - // returns and kills. If the successor vector contain duplicates - // if the merge block, they are safely ignored by DFS. - void ComputeStructuredSuccessors(Function* func); - - // Return function to return ordered structure successors for a given block - // Assumes ComputeStructuredSuccessors() has been called. - GetBlocksFunction StructuredSuccessorsFunction(); - - // Return true if |func| has multiple returns - bool HasMultipleReturns(Function* func); + // Return true if |func| does not have a return that is + // nested in a structured if, switch or loop. + bool HasNoReturnInStructuredConstruct(Function* func); // Return true if |func| has no return in a loop. The current analysis // requires structured control flow, so return false if control flow not @@ -163,8 +148,8 @@ class InlinePass : public Pass { // CFG. It has functionality not present in CFG. Consolidate. std::unordered_map id2block_; - // Set of ids of functions with multiple returns. - std::set multi_return_funcs_; + // Set of ids of functions with early return. + std::set early_return_funcs_; // Set of ids of functions with no returns in loop std::set no_return_in_loop_; @@ -174,13 +159,6 @@ class InlinePass : public Pass { // result id for OpConstantFalse uint32_t false_id_; - - // Map from block to its structured successor blocks. See - // ComputeStructuredSuccessors() for definition. TODO(dnovillo): This is - // superfluous wrt CFG, but it seems to be computed in a slightly - // different way in the inliner. Can these be consolidated? - std::unordered_map> - block2structured_succs_; }; } // namespace opt diff --git a/source/opt/ir_context.h b/source/opt/ir_context.h index 3d323e6a2..742c868aa 100644 --- a/source/opt/ir_context.h +++ b/source/opt/ir_context.h @@ -231,7 +231,7 @@ class IRContext { // Returns a pointer to a StructuredCFGAnalysis. If the analysis is invalid, // it is rebuilt first. - StructuredCFGAnalysis* GetStructuedCFGAnalaysis() { + StructuredCFGAnalysis* GetStructuredCFGAnalysis() { if (!AreAnalysesValid(kAnalysisStructuredCFG)) { BuildStructuredCFGAnalysis(); } diff --git a/source/opt/merge_return_pass.cpp b/source/opt/merge_return_pass.cpp index b5df490c0..10c7d2f51 100644 --- a/source/opt/merge_return_pass.cpp +++ b/source/opt/merge_return_pass.cpp @@ -743,7 +743,7 @@ bool MergeReturnPass::HasNontrivialUnreachableBlocks(Function* function) { } StructuredCFGAnalysis* struct_cfg_analysis = - context()->GetStructuedCFGAnalaysis(); + context()->GetStructuredCFGAnalysis(); if (struct_cfg_analysis->IsMergeBlock(bb.id())) { // |bb| must be an empty block ending with OpUnreachable. if (bb.begin()->opcode() != SpvOpUnreachable) { diff --git a/test/opt/inline_test.cpp b/test/opt/inline_test.cpp index f0a376e13..2ece9f4f8 100644 --- a/test/opt/inline_test.cpp +++ b/test/opt/inline_test.cpp @@ -2144,19 +2144,12 @@ OpFunctionEnd OpBranch %19 %19 = OpLabel %20 = OpCopyObject %int %int_2 +%25 = OpCopyObject %int %int_0 OpLoopMerge %23 %26 None -OpBranch %25 -%25 = OpLabel -OpLoopMerge %26 %27 None -OpBranch %28 -%28 = OpLabel -%29 = OpCopyObject %int %int_0 -OpBranch %26 -%30 = OpLabel -%31 = OpCopyObject %int %int_1 OpBranch %26 %27 = OpLabel -OpBranchConditional %false %25 %26 +%28 = OpCopyObject %int %int_1 +OpBranch %26 %26 = OpLabel %22 = OpCopyObject %int %int_3 OpBranchConditional %true %19 %23 @@ -2226,23 +2219,16 @@ OpFunctionEnd R"(%1 = OpFunction %void None %9 %20 = OpLabel %21 = OpCopyObject %int %int_3 -OpBranch %24 -%24 = OpLabel -OpLoopMerge %25 %26 None -OpBranch %27 -%27 = OpLabel -%28 = OpCopyObject %int %int_0 -OpBranch %29 -%29 = OpLabel -%30 = OpPhi %int %28 %27 -%31 = OpCopyObject %int %int_1 +%24 = OpCopyObject %int %int_0 OpBranch %25 -%32 = OpLabel -%33 = OpCopyObject %int %int_2 -OpBranch %25 -%26 = OpLabel -OpBranchConditional %false %24 %25 %25 = OpLabel +%26 = OpPhi %int %24 %20 +%27 = OpCopyObject %int %int_1 +OpBranch %28 +%29 = OpLabel +%30 = OpCopyObject %int %int_2 +OpBranch %28 +%28 = OpLabel %23 = OpCopyObject %int %int_4 OpReturn OpFunctionEnd @@ -2253,6 +2239,222 @@ OpFunctionEnd false, true); } +TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) { + // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018 + // + // The callee has a single return, but cannot be inlined because the + // return is inside a loop. + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_GLF_color +OpExecutionMode %main OriginUpperLeft +OpSource ESSL 310 +OpName %main "main" +OpName %f_ "f(" +OpName %i "i" +OpName %_GLF_color "_GLF_color" +OpDecorate %_GLF_color Location 0 +%void = OpTypeVoid +%7 = OpTypeFunction %void +%float = OpTypeFloat 32 +%9 = OpTypeFunction %float +%float_1 = OpConstant %float 1 +%bool = OpTypeBool +%false = OpConstantFalse %bool +%int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%int_0 = OpConstant %int 0 +%int_1 = OpConstant %int 1 +%v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_GLF_color = OpVariable %_ptr_Output_v4float Output +%float_0 = OpConstant %float 0 +%20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%21 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 +)"; + + const std::string caller = + R"(%main = OpFunction %void None %7 +%22 = OpLabel +%i = OpVariable %_ptr_Function_int Function +OpStore %i %int_0 +OpBranch %23 +%23 = OpLabel +OpLoopMerge %24 %25 None +OpBranch %26 +%26 = OpLabel +%27 = OpLoad %int %i +%28 = OpSLessThan %bool %27 %int_1 +OpBranchConditional %28 %29 %24 +%29 = OpLabel +OpStore %_GLF_color %20 +%30 = OpFunctionCall %float %f_ +OpBranch %25 +%25 = OpLabel +%31 = OpLoad %int %i +%32 = OpIAdd %int %31 %int_1 +OpStore %i %32 +OpBranch %23 +%24 = OpLabel +OpStore %_GLF_color %21 +OpReturn +OpFunctionEnd +)"; + + const std::string callee = + R"(%f_ = OpFunction %float None %9 +%33 = OpLabel +OpBranch %34 +%34 = OpLabel +OpLoopMerge %35 %36 None +OpBranch %37 +%37 = OpLabel +OpReturnValue %float_1 +%36 = OpLabel +OpBranch %34 +%35 = OpLabel +OpUnreachable +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + caller + callee, predefs + caller + callee, false, true); +} + +TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) { + // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018 + // + // The callee has a single return, but needs single-trip loop wrapper + // to be inlined because the return is in a selection structure. + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_GLF_color +OpExecutionMode %main OriginUpperLeft +OpSource ESSL 310 +OpName %main "main" +OpName %f_ "f(" +OpName %i "i" +OpName %_GLF_color "_GLF_color" +OpDecorate %_GLF_color Location 0 +%void = OpTypeVoid +%7 = OpTypeFunction %void +%float = OpTypeFloat 32 +%9 = OpTypeFunction %float +%float_1 = OpConstant %float 1 +%bool = OpTypeBool +%false = OpConstantFalse %bool +%true = OpConstantTrue %bool +%int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%int_0 = OpConstant %int 0 +%int_1 = OpConstant %int 1 +%v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_GLF_color = OpVariable %_ptr_Output_v4float Output +%float_0 = OpConstant %float 0 +%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 +)"; + + const std::string new_predefs = + R"(%_ptr_Function_float = OpTypePointer Function %float +)"; + + const std::string main_before = + R"(%main = OpFunction %void None %7 +%23 = OpLabel +%i = OpVariable %_ptr_Function_int Function +OpStore %i %int_0 +OpBranch %24 +%24 = OpLabel +OpLoopMerge %25 %26 None +OpBranch %27 +%27 = OpLabel +%28 = OpLoad %int %i +%29 = OpSLessThan %bool %28 %int_1 +OpBranchConditional %29 %30 %25 +%30 = OpLabel +OpStore %_GLF_color %21 +%31 = OpFunctionCall %float %f_ +OpBranch %26 +%26 = OpLabel +%32 = OpLoad %int %i +%33 = OpIAdd %int %32 %int_1 +OpStore %i %33 +OpBranch %24 +%25 = OpLabel +OpStore %_GLF_color %22 +OpReturn +OpFunctionEnd +)"; + + const std::string main_after = + R"(%main = OpFunction %void None %7 +%23 = OpLabel +%38 = OpVariable %_ptr_Function_float Function +%i = OpVariable %_ptr_Function_int Function +OpStore %i %int_0 +OpBranch %24 +%24 = OpLabel +OpLoopMerge %25 %26 None +OpBranch %27 +%27 = OpLabel +%28 = OpLoad %int %i +%29 = OpSLessThan %bool %28 %int_1 +OpBranchConditional %29 %30 %25 +%30 = OpLabel +OpStore %_GLF_color %21 +OpBranch %39 +%39 = OpLabel +OpLoopMerge %40 %41 None +OpBranch %42 +%42 = OpLabel +OpSelectionMerge %43 None +OpBranchConditional %true %44 %43 +%44 = OpLabel +OpStore %38 %float_1 +OpBranch %40 +%43 = OpLabel +OpUnreachable +%41 = OpLabel +OpBranchConditional %false %39 %40 +%40 = OpLabel +%31 = OpLoad %float %38 +OpBranch %26 +%26 = OpLabel +%32 = OpLoad %int %i +%33 = OpIAdd %int %32 %int_1 +OpStore %i %33 +OpBranch %24 +%25 = OpLabel +OpStore %_GLF_color %22 +OpReturn +OpFunctionEnd +)"; + + const std::string callee = + R"(%f_ = OpFunction %float None %9 +%34 = OpLabel +OpSelectionMerge %35 None +OpBranchConditional %true %36 %35 +%36 = OpLabel +OpReturnValue %float_1 +%35 = OpLabel +OpUnreachable +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + main_before + callee, + predefs + new_predefs + main_after + callee, false, true); +} + TEST_F(InlineTest, Decorated1) { // Same test as Simple with the difference // that OpFAdd in the outlined function is