Don't assume one return means function can be inlined. (#2018) (#2025)

If there is only 1 return and it is in a loop, then the function cannot be inlined.

Fix condition when inlined code needs one-trip loop wrapper.  The dummy loop is needed when there is a return inside a selection construct.  Even if there is only 1 return.
This commit is contained in:
greg-lunarg 2018-11-08 07:11:20 -07:00 committed by Steven Perron
parent 4e6c54b602
commit 6721478ef1
6 changed files with 268 additions and 135 deletions

View File

@ -93,7 +93,7 @@ BasicBlock* DeadBranchElimPass::GetParentBlock(uint32_t id) {
bool DeadBranchElimPass::MarkLiveBlocks(
Function* func, std::unordered_set<BasicBlock*>* live_blocks) {
StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuedCFGAnalaysis();
StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuredCFGAnalysis();
std::unordered_set<BasicBlock*> continues;
std::vector<BasicBlock*> stack;

View File

@ -27,7 +27,6 @@
static const int kSpvFunctionCallFunctionId = 2;
static const int kSpvFunctionCallArgumentId = 3;
static const int kSpvReturnValueId = 0;
static const int kSpvLoopMergeMergeBlockId = 0;
static const int kSpvLoopMergeContinueTargetIdInIdx = 1;
namespace spvtools {
@ -225,8 +224,8 @@ void InlinePass::GenInlineCode(
kSpvFunctionCallFunctionId)];
// Check for multiple returns in the callee.
auto fi = multi_return_funcs_.find(calleeFn->result_id());
const bool multiReturn = fi != multi_return_funcs_.end();
auto fi = early_return_funcs_.find(calleeFn->result_id());
const bool earlyReturn = fi != early_return_funcs_.end();
// Map parameters to actual arguments.
MapParams(calleeFn, call_inst_itr, &callee2caller);
@ -280,7 +279,7 @@ void InlinePass::GenInlineCode(
&call_inst_itr, &new_blk_ptr, &prevInstWasReturn,
&returnLabelId, &returnVarId, caller_is_loop_header,
callee_begins_with_structured_header, &calleeTypeId,
&multiBlocks, &postCallSB, &preCallSB, multiReturn,
&multiBlocks, &postCallSB, &preCallSB, earlyReturn,
&singleTripLoopHeaderId, &singleTripLoopContinueId,
&callee_result_ids, this](const Instruction* cpi) {
switch (cpi->opcode()) {
@ -369,7 +368,7 @@ void InlinePass::GenInlineCode(
// satisfy dominance.
callee2caller[cpi->result_id()] = guard_block_id;
}
// If callee has multiple returns, insert a header block for
// If callee has early return, insert a header block for
// single-trip loop that will encompass callee code. Start postheader
// block.
//
@ -380,7 +379,7 @@ void InlinePass::GenInlineCode(
// We still need to split the caller block and insert a guard block.
// But we only need to do it once. We haven't done it yet, but the
// single-trip loop header will serve the same purpose.
if (multiReturn) {
if (earlyReturn) {
singleTripLoopHeaderId = this->TakeNextId();
AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
new_blocks->push_back(std::move(new_blk_ptr));
@ -429,9 +428,9 @@ void InlinePass::GenInlineCode(
// If previous instruction was return, insert branch instruction
// to return block.
if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr);
if (multiReturn) {
// If we generated a loop header to for the single-trip loop
// to accommodate multiple returns, insert the continue
if (earlyReturn) {
// If we generated a loop header for the single-trip loop
// to accommodate early returns, insert the continue
// target block now, with a false branch back to the loop header.
new_blocks->push_back(std::move(new_blk_ptr));
new_blk_ptr =
@ -561,44 +560,24 @@ void InlinePass::UpdateSucceedingPhis(
});
}
bool InlinePass::HasMultipleReturns(Function* func) {
bool seenReturn = false;
bool multipleReturns = false;
bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) {
// If control not structured, do not do loop/return analysis
// TODO: Analyze returns in non-structured control flow
if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
return false;
const auto structured_analysis = context()->GetStructuredCFGAnalysis();
// Search for returns in structured construct.
bool return_in_construct = false;
for (auto& blk : *func) {
auto terminal_ii = blk.cend();
--terminal_ii;
if (terminal_ii->opcode() == SpvOpReturn ||
terminal_ii->opcode() == SpvOpReturnValue) {
if (seenReturn) {
multipleReturns = true;
break;
}
seenReturn = true;
if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
structured_analysis->ContainingConstruct(blk.id()) != 0) {
return_in_construct = true;
break;
}
}
return multipleReturns;
}
void InlinePass::ComputeStructuredSuccessors(Function* func) {
// If header, make merge block first successor.
for (auto& blk : *func) {
uint32_t mbid = blk.MergeBlockIdIfAny();
if (mbid != 0) {
block2structured_succs_[&blk].push_back(id2block_[mbid]);
}
// Add true successors.
const auto& const_blk = blk;
const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
block2structured_succs_[&blk].push_back(id2block_[sbid]);
});
}
}
InlinePass::GetBlocksFunction InlinePass::StructuredSuccessorsFunction() {
return [this](const BasicBlock* block) {
return &(block2structured_succs_[block]);
};
return !return_in_construct;
}
bool InlinePass::HasNoReturnInLoop(Function* func) {
@ -606,52 +585,27 @@ bool InlinePass::HasNoReturnInLoop(Function* func) {
// TODO: Analyze returns in non-structured control flow
if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
return false;
// Compute structured block order. This order has the property
// that dominators are before all blocks they dominate and merge blocks
// are after all blocks that are in the control constructs of their header.
ComputeStructuredSuccessors(func);
auto ignore_block = [](cbb_ptr) {};
auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
std::list<const BasicBlock*> structuredOrder;
CFA<BasicBlock>::DepthFirstTraversal(
&*func->begin(), StructuredSuccessorsFunction(), ignore_block,
[&](cbb_ptr b) { structuredOrder.push_front(b); }, ignore_edge);
// Search for returns in loops. Only need to track outermost loop
const auto structured_analysis = context()->GetStructuredCFGAnalysis();
// Search for returns in structured construct.
bool return_in_loop = false;
uint32_t outerLoopMergeId = 0;
for (auto& blk : structuredOrder) {
// Exiting current outer loop
if (blk->id() == outerLoopMergeId) outerLoopMergeId = 0;
// Return block
auto terminal_ii = blk->cend();
for (auto& blk : *func) {
auto terminal_ii = blk.cend();
--terminal_ii;
if (terminal_ii->opcode() == SpvOpReturn ||
terminal_ii->opcode() == SpvOpReturnValue) {
if (outerLoopMergeId != 0) {
return_in_loop = true;
break;
}
} else if (terminal_ii != blk->cbegin()) {
auto merge_ii = terminal_ii;
--merge_ii;
// Entering outermost loop
if (merge_ii->opcode() == SpvOpLoopMerge && outerLoopMergeId == 0)
outerLoopMergeId =
merge_ii->GetSingleWordOperand(kSpvLoopMergeMergeBlockId);
if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
structured_analysis->ContainingLoop(blk.id()) != 0) {
return_in_loop = true;
break;
}
}
return !return_in_loop;
}
void InlinePass::AnalyzeReturns(Function* func) {
// Look for multiple returns
if (!HasMultipleReturns(func)) {
if (HasNoReturnInLoop(func)) {
no_return_in_loop_.insert(func->result_id());
return;
if (!HasNoReturnInStructuredConstruct(func))
early_return_funcs_.insert(func->result_id());
}
multi_return_funcs_.insert(func->result_id());
// If multiple returns, see if any are in a loop
if (HasNoReturnInLoop(func)) no_return_in_loop_.insert(func->result_id());
}
bool InlinePass::IsInlinableFunction(Function* func) {
@ -673,10 +627,9 @@ void InlinePass::InitializeInline() {
// clear collections
id2function_.clear();
id2block_.clear();
block2structured_succs_.clear();
inlinable_.clear();
no_return_in_loop_.clear();
multi_return_funcs_.clear();
early_return_funcs_.clear();
for (auto& fn : *get_module()) {
// Initialize function and block maps.

View File

@ -36,9 +36,6 @@ class InlinePass : public Pass {
using cbb_ptr = const BasicBlock*;
public:
using GetBlocksFunction =
std::function<std::vector<BasicBlock*>*(const BasicBlock*)>;
virtual ~InlinePass() = default;
protected:
@ -122,21 +119,9 @@ class InlinePass : public Pass {
// Return true if |inst| is a function call that can be inlined.
bool IsInlinableFunctionCall(const Instruction* inst);
// Compute structured successors for function |func|.
// A block's structured successors are the blocks it branches to
// together with its declared merge block if it has one.
// When order matters, the merge block always appears first.
// This assures correct depth first search in the presence of early
// returns and kills. If the successor vector contain duplicates
// if the merge block, they are safely ignored by DFS.
void ComputeStructuredSuccessors(Function* func);
// Return function to return ordered structure successors for a given block
// Assumes ComputeStructuredSuccessors() has been called.
GetBlocksFunction StructuredSuccessorsFunction();
// Return true if |func| has multiple returns
bool HasMultipleReturns(Function* func);
// Return true if |func| does not have a return that is
// nested in a structured if, switch or loop.
bool HasNoReturnInStructuredConstruct(Function* func);
// Return true if |func| has no return in a loop. The current analysis
// requires structured control flow, so return false if control flow not
@ -163,8 +148,8 @@ class InlinePass : public Pass {
// CFG. It has functionality not present in CFG. Consolidate.
std::unordered_map<uint32_t, BasicBlock*> id2block_;
// Set of ids of functions with multiple returns.
std::set<uint32_t> multi_return_funcs_;
// Set of ids of functions with early return.
std::set<uint32_t> early_return_funcs_;
// Set of ids of functions with no returns in loop
std::set<uint32_t> no_return_in_loop_;
@ -174,13 +159,6 @@ class InlinePass : public Pass {
// result id for OpConstantFalse
uint32_t false_id_;
// Map from block to its structured successor blocks. See
// ComputeStructuredSuccessors() for definition. TODO(dnovillo): This is
// superfluous wrt CFG, but it seems to be computed in a slightly
// different way in the inliner. Can these be consolidated?
std::unordered_map<const BasicBlock*, std::vector<BasicBlock*>>
block2structured_succs_;
};
} // namespace opt

View File

@ -231,7 +231,7 @@ class IRContext {
// Returns a pointer to a StructuredCFGAnalysis. If the analysis is invalid,
// it is rebuilt first.
StructuredCFGAnalysis* GetStructuedCFGAnalaysis() {
StructuredCFGAnalysis* GetStructuredCFGAnalysis() {
if (!AreAnalysesValid(kAnalysisStructuredCFG)) {
BuildStructuredCFGAnalysis();
}

View File

@ -743,7 +743,7 @@ bool MergeReturnPass::HasNontrivialUnreachableBlocks(Function* function) {
}
StructuredCFGAnalysis* struct_cfg_analysis =
context()->GetStructuedCFGAnalaysis();
context()->GetStructuredCFGAnalysis();
if (struct_cfg_analysis->IsMergeBlock(bb.id())) {
// |bb| must be an empty block ending with OpUnreachable.
if (bb.begin()->opcode() != SpvOpUnreachable) {

View File

@ -2144,19 +2144,12 @@ OpFunctionEnd
OpBranch %19
%19 = OpLabel
%20 = OpCopyObject %int %int_2
%25 = OpCopyObject %int %int_0
OpLoopMerge %23 %26 None
OpBranch %25
%25 = OpLabel
OpLoopMerge %26 %27 None
OpBranch %28
%28 = OpLabel
%29 = OpCopyObject %int %int_0
OpBranch %26
%30 = OpLabel
%31 = OpCopyObject %int %int_1
OpBranch %26
%27 = OpLabel
OpBranchConditional %false %25 %26
%28 = OpCopyObject %int %int_1
OpBranch %26
%26 = OpLabel
%22 = OpCopyObject %int %int_3
OpBranchConditional %true %19 %23
@ -2226,23 +2219,16 @@ OpFunctionEnd
R"(%1 = OpFunction %void None %9
%20 = OpLabel
%21 = OpCopyObject %int %int_3
OpBranch %24
%24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%28 = OpCopyObject %int %int_0
OpBranch %29
%29 = OpLabel
%30 = OpPhi %int %28 %27
%31 = OpCopyObject %int %int_1
%24 = OpCopyObject %int %int_0
OpBranch %25
%32 = OpLabel
%33 = OpCopyObject %int %int_2
OpBranch %25
%26 = OpLabel
OpBranchConditional %false %24 %25
%25 = OpLabel
%26 = OpPhi %int %24 %20
%27 = OpCopyObject %int %int_1
OpBranch %28
%29 = OpLabel
%30 = OpCopyObject %int %int_2
OpBranch %28
%28 = OpLabel
%23 = OpCopyObject %int %int_4
OpReturn
OpFunctionEnd
@ -2253,6 +2239,222 @@ OpFunctionEnd
false, true);
}
TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) {
// The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
//
// The callee has a single return, but cannot be inlined because the
// return is inside a loop.
const std::string predefs =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %_GLF_color
OpExecutionMode %main OriginUpperLeft
OpSource ESSL 310
OpName %main "main"
OpName %f_ "f("
OpName %i "i"
OpName %_GLF_color "_GLF_color"
OpDecorate %_GLF_color Location 0
%void = OpTypeVoid
%7 = OpTypeFunction %void
%float = OpTypeFloat 32
%9 = OpTypeFunction %float
%float_1 = OpConstant %float 1
%bool = OpTypeBool
%false = OpConstantFalse %bool
%int = OpTypeInt 32 1
%_ptr_Function_int = OpTypePointer Function %int
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_GLF_color = OpVariable %_ptr_Output_v4float Output
%float_0 = OpConstant %float 0
%20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
%21 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
)";
const std::string caller =
R"(%main = OpFunction %void None %7
%22 = OpLabel
%i = OpVariable %_ptr_Function_int Function
OpStore %i %int_0
OpBranch %23
%23 = OpLabel
OpLoopMerge %24 %25 None
OpBranch %26
%26 = OpLabel
%27 = OpLoad %int %i
%28 = OpSLessThan %bool %27 %int_1
OpBranchConditional %28 %29 %24
%29 = OpLabel
OpStore %_GLF_color %20
%30 = OpFunctionCall %float %f_
OpBranch %25
%25 = OpLabel
%31 = OpLoad %int %i
%32 = OpIAdd %int %31 %int_1
OpStore %i %32
OpBranch %23
%24 = OpLabel
OpStore %_GLF_color %21
OpReturn
OpFunctionEnd
)";
const std::string callee =
R"(%f_ = OpFunction %float None %9
%33 = OpLabel
OpBranch %34
%34 = OpLabel
OpLoopMerge %35 %36 None
OpBranch %37
%37 = OpLabel
OpReturnValue %float_1
%36 = OpLabel
OpBranch %34
%35 = OpLabel
OpUnreachable
OpFunctionEnd
)";
SinglePassRunAndCheck<InlineExhaustivePass>(
predefs + caller + callee, predefs + caller + callee, false, true);
}
TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
// The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
//
// The callee has a single return, but needs single-trip loop wrapper
// to be inlined because the return is in a selection structure.
const std::string predefs =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %_GLF_color
OpExecutionMode %main OriginUpperLeft
OpSource ESSL 310
OpName %main "main"
OpName %f_ "f("
OpName %i "i"
OpName %_GLF_color "_GLF_color"
OpDecorate %_GLF_color Location 0
%void = OpTypeVoid
%7 = OpTypeFunction %void
%float = OpTypeFloat 32
%9 = OpTypeFunction %float
%float_1 = OpConstant %float 1
%bool = OpTypeBool
%false = OpConstantFalse %bool
%true = OpConstantTrue %bool
%int = OpTypeInt 32 1
%_ptr_Function_int = OpTypePointer Function %int
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_GLF_color = OpVariable %_ptr_Output_v4float Output
%float_0 = OpConstant %float 0
%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
)";
const std::string new_predefs =
R"(%_ptr_Function_float = OpTypePointer Function %float
)";
const std::string main_before =
R"(%main = OpFunction %void None %7
%23 = OpLabel
%i = OpVariable %_ptr_Function_int Function
OpStore %i %int_0
OpBranch %24
%24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%28 = OpLoad %int %i
%29 = OpSLessThan %bool %28 %int_1
OpBranchConditional %29 %30 %25
%30 = OpLabel
OpStore %_GLF_color %21
%31 = OpFunctionCall %float %f_
OpBranch %26
%26 = OpLabel
%32 = OpLoad %int %i
%33 = OpIAdd %int %32 %int_1
OpStore %i %33
OpBranch %24
%25 = OpLabel
OpStore %_GLF_color %22
OpReturn
OpFunctionEnd
)";
const std::string main_after =
R"(%main = OpFunction %void None %7
%23 = OpLabel
%38 = OpVariable %_ptr_Function_float Function
%i = OpVariable %_ptr_Function_int Function
OpStore %i %int_0
OpBranch %24
%24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%28 = OpLoad %int %i
%29 = OpSLessThan %bool %28 %int_1
OpBranchConditional %29 %30 %25
%30 = OpLabel
OpStore %_GLF_color %21
OpBranch %39
%39 = OpLabel
OpLoopMerge %40 %41 None
OpBranch %42
%42 = OpLabel
OpSelectionMerge %43 None
OpBranchConditional %true %44 %43
%44 = OpLabel
OpStore %38 %float_1
OpBranch %40
%43 = OpLabel
OpUnreachable
%41 = OpLabel
OpBranchConditional %false %39 %40
%40 = OpLabel
%31 = OpLoad %float %38
OpBranch %26
%26 = OpLabel
%32 = OpLoad %int %i
%33 = OpIAdd %int %32 %int_1
OpStore %i %33
OpBranch %24
%25 = OpLabel
OpStore %_GLF_color %22
OpReturn
OpFunctionEnd
)";
const std::string callee =
R"(%f_ = OpFunction %float None %9
%34 = OpLabel
OpSelectionMerge %35 None
OpBranchConditional %true %36 %35
%36 = OpLabel
OpReturnValue %float_1
%35 = OpLabel
OpUnreachable
OpFunctionEnd
)";
SinglePassRunAndCheck<InlineExhaustivePass>(
predefs + main_before + callee,
predefs + new_predefs + main_after + callee, false, true);
}
TEST_F(InlineTest, Decorated1) {
// Same test as Simple with the difference
// that OpFAdd in the outlined function is