From 55c2ca90ae290a3cd312fd711ac692e986061dfa Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 26 Aug 2019 14:21:35 +0200 Subject: [PATCH 1/2] Elide branches to continue block when continue block is also a merge. --- .../frag/selection-merge-to-continue.asm.frag | 2 -- ...temporary-use-continue-block-as-value.frag | 2 -- .../frag/selection-merge-to-continue.asm.frag | 2 -- spirv_glsl.cpp | 21 ++++++++++++++++--- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag index 05c17c7a..45cf8523 100644 --- a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag @@ -11,12 +11,10 @@ void main() if (v0.x == 20.0) { FragColor += vec4(v0[_54 & 3]); - continue; } else { FragColor += vec4(v0[_54 & 1]); - continue; } continue; } diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag index 91d7e37c..6c07749a 100644 --- a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag +++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag @@ -15,12 +15,10 @@ void main() if ((vA + _57) == 20) { _58 = 50; - continue; } else { _58 = ((vB + _57) == 40) ? 60 : _60; - continue; } continue; } diff --git a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag index 82b5973f..edbce0cc 100644 --- a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag +++ b/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag @@ -11,12 +11,10 @@ void main() if (v0.x == 20.0) { FragColor += vec4(v0[i & 3]); - continue; } else { FragColor += vec4(v0[i & 1]); - continue; } } } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 51803a66..01b93227 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -11285,7 +11285,7 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block. - // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, + // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, // we can avoid writing out an explicit continue statement. // Similar optimization to return statements if we know we're outside flow control. if (!outside_control_flow) @@ -11298,6 +11298,8 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) flush_phi(from, to); flush_control_dependent_expressions(from); + bool to_is_continue = is_continue(to); + // This is only a continue if we branch to our loop dominator. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) { @@ -11326,12 +11328,25 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) } statement("break;"); } - else if (is_continue(to) || (from == to)) + else if (to_is_continue || from == to) { // For from == to case can happen for a do-while loop which branches into itself. // We don't mark these cases as continue blocks, but the only possible way to branch into // ourselves is through means of continue blocks. - branch_to_continue(from, to); + + // If we are merging to a continue block, there is no need to emit the block chain for continue here. + // We can branch to the continue block after we merge execution. + + // Here we make use of structured control flow rules from spec: + // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block + // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG + // If we are branching to a merge block, we must be inside a construct which dominates the merge block. + auto &block_meta = ir.block_meta[to]; + bool branching_to_merge = + (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | + ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; + if (!to_is_continue || !branching_to_merge) + branch_to_continue(from, to); } else if (!is_conditional(to)) emit_block_chain(get(to)); From 5d97dae1ebd33308e91656bc5cb3a7b440993d18 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 26 Aug 2019 15:36:23 +0200 Subject: [PATCH 2/2] Move branchless analysis to CFG. Traverse backwards instead, far more robust. Should elide basically all redundant continue; statements now. --- .../phi-temporary-copy-loop-variable.asm.comp | 1 - .../frag/selection-merge-to-continue.asm.frag | 1 - .../frag/switch-merge-to-continue.asm.frag | 1 - ...temporary-use-continue-block-as-value.frag | 1 - .../loop-dominator-and-switch-default.frag | 1 - .../phi-temporary-copy-loop-variable.asm.comp | 1 - .../frag/for-loop-phi-only-continue.asm.frag | 1 - .../frag/op-phi-swap-continue-block.asm.frag | 1 - spirv_cfg.cpp | 79 ++++++++++++++++--- spirv_cfg.hpp | 10 ++- spirv_cross.cpp | 44 +++-------- spirv_cross.hpp | 3 +- spirv_glsl.cpp | 20 +++-- 13 files changed, 102 insertions(+), 62 deletions(-) diff --git a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp index 9266982b..4f8d2ffa 100644 --- a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp +++ b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp @@ -14,7 +14,6 @@ void main() { break; } - continue; } imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); } diff --git a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag index 45cf8523..370e5005 100644 --- a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag @@ -16,7 +16,6 @@ void main() { FragColor += vec4(v0[_54 & 1]); } - continue; } } diff --git a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag index ea4a2599..d40a3f80 100644 --- a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag @@ -25,7 +25,6 @@ void main() break; } } - continue; } } diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag index 6c07749a..89be3d5a 100644 --- a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag +++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag @@ -20,7 +20,6 @@ void main() { _58 = ((vB + _57) == 40) ? 60 : _60; } - continue; } } diff --git a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag index 00f4b6be..2c193483 100644 --- a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag +++ b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag @@ -50,7 +50,6 @@ void main() vec4 _79 = _83; _79.y = _83.y + 0.5; _89 = _79; - continue; } fragColor = _82; } diff --git a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp index 9266982b..4f8d2ffa 100644 --- a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp +++ b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp @@ -14,7 +14,6 @@ void main() { break; } - continue; } imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); } diff --git a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag index feb45db4..31011429 100644 --- a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag +++ b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag @@ -12,7 +12,6 @@ void main() { _20 = _19 + 1.0; _23 = _22 + 1; - continue; } FragColor = vec4(_19); } diff --git a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag index 3dae3e16..d62b63a0 100644 --- a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag +++ b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag @@ -18,7 +18,6 @@ void main() _24 = _5.uJ; for (int _26 = 0; _26 < _5.uCount; _23_copy = _23, _23 = _24, _24 = _23_copy, _26++) { - continue; } FragColor = float(_24 - _23) * float(_5.uJ * _5.uK); } diff --git a/spirv_cfg.cpp b/spirv_cfg.cpp index 8f6f024c..de790669 100644 --- a/spirv_cfg.cpp +++ b/spirv_cfg.cpp @@ -97,8 +97,22 @@ bool CFG::post_order_visit(uint32_t block_id) // Block back-edges from recursively revisiting ourselves. visit_order[block_id].get() = 0; - // First visit our branch targets. auto &block = compiler.get(block_id); + + // If this is a loop header, add an implied branch to the merge target. + // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. + // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. + // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. + // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. + + // Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop + // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis. + // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine, + // but for loops, only the header might end up actually branching to merge block. + if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + add_branch(block_id, block.merge_block); + + // First visit our branch targets. switch (block.terminator) { case SPIRBlock::Direct: @@ -127,14 +141,6 @@ bool CFG::post_order_visit(uint32_t block_id) break; } - // If this is a loop header, add an implied branch to the merge target. - // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. - // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. - // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. - // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. - if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) - add_branch(block_id, block.merge_block); - // If this is a selection merge, add an implied branch to the merge target. // This is needed to avoid cases where an inner branch dominates the outer branch. // This can happen if one of the branches exit early, e.g.: @@ -262,6 +268,61 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const return block_id; } +bool CFG::node_terminates_control_flow_in_sub_graph(uint32_t from, uint32_t to) const +{ + // Walk backwards, starting from "to" block. + // Only follow pred edges if they have a 1:1 relationship, or a merge relationship. + // If we cannot find a path to "from", we must assume that to is inside control flow in some way. + + auto &from_block = compiler.get(from); + uint32_t ignore_block_id = 0; + if (from_block.merge == SPIRBlock::MergeLoop) + ignore_block_id = from_block.merge_block; + + while (to != from) + { + auto pred_itr = preceding_edges.find(to); + if (pred_itr == end(preceding_edges)) + return false; + + DominatorBuilder builder(*this); + for (auto &edge : pred_itr->second) + builder.add_block(edge); + + uint32_t dominator = builder.get_dominator(); + if (dominator == 0) + return false; + + auto &dom = compiler.get(dominator); + + bool true_path_ignore = false; + bool false_path_ignore = false; + if (ignore_block_id && dom.terminator == SPIRBlock::Select) + { + auto &true_block = compiler.get(dom.true_block); + auto &false_block = compiler.get(dom.false_block); + auto &ignore_block = compiler.get(ignore_block_id); + true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); + false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + } + + if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) || + (dom.terminator == SPIRBlock::Direct && dom.next_block == to) || + (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) || + (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore)) + { + // Allow walking selection constructs if the other branch reaches out of a loop construct. + // It cannot be in-scope anymore. + to = dominator; + } + else + return false; + } + + return true; +} + DominatorBuilder::DominatorBuilder(const CFG &cfg_) : cfg(cfg_) { diff --git a/spirv_cfg.hpp b/spirv_cfg.hpp index 7d07d484..fd3c0e6e 100644 --- a/spirv_cfg.hpp +++ b/spirv_cfg.hpp @@ -88,13 +88,17 @@ public: return; seen_blocks.insert(block); - op(block); - for (auto b : get_succeeding_edges(block)) - walk_from(seen_blocks, b, op); + if (op(block)) + { + for (auto b : get_succeeding_edges(block)) + walk_from(seen_blocks, b, op); + } } uint32_t find_loop_dominator(uint32_t block) const; + bool node_terminates_control_flow_in_sub_graph(uint32_t from, uint32_t to) const; + private: struct VisitOrder { diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 3a9670ed..a04a9469 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -1396,38 +1396,6 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method return false; } -bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) -{ - auto *start = &from; - - if (start->self == to.self) - return true; - - // Break cycles. - if (is_continue(start->self)) - return false; - - // If our select block doesn't merge, we must break or continue in these blocks, - // so if continues occur branchless within these blocks, consider them branchless as well. - // This is typically used for loop control. - if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone && - (block_is_outside_flow_control_from_block(get(start->true_block), to) || - block_is_outside_flow_control_from_block(get(start->false_block), to))) - { - return true; - } - else if (start->merge_block && block_is_outside_flow_control_from_block(get(start->merge_block), to)) - { - return true; - } - else if (start->next_block && block_is_outside_flow_control_from_block(get(start->next_block), to)) - { - return true; - } - else - return false; -} - bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const { if (!execution_is_branchless(from, to)) @@ -3436,10 +3404,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA // merge can occur. Walk the CFG to see if we find anything. seen_blocks.clear(); - cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) { + cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { // We found a block which accesses the variable outside the loop. if (blocks.find(walk_block) != end(blocks)) static_loop_init = false; + return true; }); if (!static_loop_init) @@ -3803,6 +3772,15 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin return true; } +const CFG &Compiler::get_cfg_for_current_function() const +{ + assert(current_function); + auto cfg_itr = function_cfgs.find(current_function->self); + assert(cfg_itr != end(function_cfgs)); + assert(cfg_itr->second); + return *cfg_itr->second; +} + void Compiler::build_function_control_flow_graphs_and_analyze() { CFGBuilder handler(*this); diff --git a/spirv_cross.hpp b/spirv_cross.hpp index ca75dc66..9701d4c2 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -657,7 +657,6 @@ protected: bool function_is_pure(const SPIRFunction &func); bool block_is_pure(const SPIRBlock &block); - bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const; @@ -883,6 +882,8 @@ protected: void build_function_control_flow_graphs_and_analyze(); std::unordered_map> function_cfgs; + const CFG &get_cfg_for_current_function() const; + struct CFGBuilder : OpcodeHandler { CFGBuilder(Compiler &compiler_); diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 01b93227..9aa45065 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -11276,11 +11276,11 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) if (loop_dominator != 0) { - auto &dominator = get(loop_dominator); + auto &cfg = get_cfg_for_current_function(); // For non-complex continue blocks, we implicitly branch to the continue block // by having the continue block be part of the loop header in for (; ; continue-block). - outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); } // Some simplification for for-loops. We always end up with a useless continue; @@ -12195,12 +12195,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } case SPIRBlock::Return: + { for (auto &line : current_function->fixup_hooks_out) line(); if (processing_entry_point) emit_fixup(); + auto &cfg = get_cfg_for_current_function(); + if (block.return_value) { auto &type = expression_type(block.return_value); @@ -12211,7 +12214,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (ir.ids[block.return_value].get_type() != TypeUndef) emit_array_copy("SPIRV_Cross_return_value", block.return_value); - if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || + if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || block.loop_dominator != SPIRBlock::NoDominator) { statement("return;"); @@ -12224,16 +12227,17 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) statement("return ", to_expression(block.return_value), ";"); } } - // If this block is the very final block and not called from control flow, - // we do not need an explicit return which looks out of place. Just end the function here. - // In the very weird case of for(;;) { return; } executing return is unconditional, - // but we actually need a return here ... - else if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || + else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || block.loop_dominator != SPIRBlock::NoDominator) { + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... statement("return;"); } break; + } case SPIRBlock::Kill: statement(backend.discard_literal, ";");