diff --git a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp index 90c1429e..d2e564c9 100644 --- a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp @@ -46,10 +46,11 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(3)]], threadgroup int* cur = stgsm; device int* _73; _73 = &buf.a[0u]; + threadgroup int* _76; int _77; for (;;) { - threadgroup int* _76 = cur; + _76 = cur; _77 = *_73; if (_77 != 0) { diff --git a/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp b/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp new file mode 100644 index 00000000..c27bef6a --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp @@ -0,0 +1,30 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + int values[]; +} _4; + +void main() +{ + int _17 = 0; + for (;;) + { + if (_17 < 100) + { + int _24 = _4.values[_17]; + _4.values[_24] = _17; + int _26 = _24 + 1; + int _18 = _4.values[_26]; + _4.values[_17] = _18; + _17 = _18; + continue; + } + else + { + break; + } + } +} + diff --git a/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp b/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp new file mode 100644 index 00000000..0517ec4d --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + int values[]; +} _4; + +void main() +{ + int _17 = 0; + for (;;) + { + if (_17 < 100) + { + int _24 = _4.values[_17]; + _4.values[_24] = _17; + _17 = _4.values[_24 + 1]; + continue; + } + else + { + break; + } + } +} + diff --git a/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp b/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp new file mode 100644 index 00000000..8f4c9578 --- /dev/null +++ b/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body-2.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 52 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_100 = OpConstant %int 100 + %bool = OpTypeBool +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %32 + %32 = OpLabel + %51 = OpPhi %int %int_0 %5 %49 %loop_continue + %38 = OpSLessThan %bool %51 %int_100 + OpLoopMerge %loop_merge %loop_continue None + OpBranchConditional %38 %loop_body %loop_merge + %loop_body = OpLabel + %40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51 + OpBranch %loop_continue + %loop_continue = OpLabel + %41 = OpLoad %int %40 + %44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41 + OpStore %44 %51 + %47 = OpIAdd %int %41 %int_1 + %48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47 + %49 = OpLoad %int %48 + OpStore %40 %49 + OpBranch %32 + %loop_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp b/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp new file mode 100644 index 00000000..b1ddd7cc --- /dev/null +++ b/shaders-no-opt/asm/comp/access-chain-dominator-in-loop-body.asm.comp @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 52 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_100 = OpConstant %int 100 + %bool = OpTypeBool +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + OpBranch %32 + %32 = OpLabel + %51 = OpPhi %int %int_0 %5 %49 %loop_continue + %38 = OpSLessThan %bool %51 %int_100 + OpLoopMerge %loop_merge %loop_continue None + OpBranchConditional %38 %loop_body %loop_merge + %loop_body = OpLabel + %40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51 + OpBranch %loop_continue + %loop_continue = OpLabel + %41 = OpLoad %int %40 + %44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41 + OpStore %44 %51 + %47 = OpIAdd %int %41 %int_1 + %48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47 + %49 = OpLoad %int %48 + OpBranch %32 + %loop_merge = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_cross.cpp b/spirv_cross.cpp index b5ce3ab8..cbe9b26a 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -2874,6 +2874,9 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) { + if (id == 0) + return; + if (id_is_phi_variable(id)) accessed_variables_to_block[id].insert(block); else if (id_is_potential_temporary(id)) @@ -2924,6 +2927,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 partial_write_variables_to_block[var->self].insert(current_block->self); } + // args[0] might be an access chain we have to track use of. + notify_variable_access(args[0], current_block->self); // Might try to store a Phi variable here. notify_variable_access(args[1], current_block->self); break; @@ -2941,9 +2946,16 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (var) accessed_variables_to_block[var->self].insert(current_block->self); - for (uint32_t i = 3; i < length; i++) + // args[2] might be another access chain we have to track use of. + for (uint32_t i = 2; i < length; i++) notify_variable_access(args[i], current_block->self); + // Also keep track of the access chain pointer itself. + // In exceptionally rare cases, we can end up with a case where + // the access chain is generated in the loop body, but is consumed in continue block. + // This means we need complex loop workarounds, and we must detect this via CFG analysis. + notify_variable_access(args[1], current_block->self); + // The result of an access chain is a fixed expression and is not really considered a temporary. auto &e = compiler.set(args[1], "", args[0], true); auto *backing_variable = compiler.maybe_get_backing_variable(ptr); @@ -2951,6 +2963,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[args[1]].set_allow_type_rewrite(); + access_chain_expressions.insert(args[1]); break; } @@ -2973,6 +2986,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 partial_write_variables_to_block[var->self].insert(current_block->self); } + // args[0:1] might be access chains we have to track use of. + for (uint32_t i = 0; i < 2; i++) + notify_variable_access(args[i], current_block->self); + var = compiler.maybe_get_backing_variable(rhs); if (var) accessed_variables_to_block[var->self].insert(current_block->self); @@ -2988,6 +3005,11 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (var) accessed_variables_to_block[var->self].insert(current_block->self); + // Might be an access chain which we have to keep track of. + notify_variable_access(args[1], current_block->self); + if (access_chain_expressions.count(args[2])) + access_chain_expressions.insert(args[1]); + // Might try to copy a Phi variable here. notify_variable_access(args[2], current_block->self); break; @@ -3004,6 +3026,9 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Loaded value is a temporary. notify_variable_access(args[1], current_block->self); + + // Might be an access chain we have to track use of. + notify_variable_access(args[2], current_block->self); break; } @@ -3370,7 +3395,14 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA // If a temporary is used in more than one block, we might have to lift continue block // access up to loop header like we did for variables. if (blocks.size() != 1 && is_continue(block)) - builder.add_block(ir.continue_block_to_loop_header[block]); + { + auto &loop_header_block = get(ir.continue_block_to_loop_header[block]); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + + // Only relevant if the loop is not marked as complex. + if (!loop_header_block.complex_continue) + builder.add_block(loop_header_block.self); + } else if (blocks.size() != 1 && is_single_block_loop(block)) { // Awkward case, because the loop header is also the continue block. @@ -3387,14 +3419,27 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA if (!first_use_is_dominator || force_temporary) { - // This should be very rare, but if we try to declare a temporary inside a loop, - // and that temporary is used outside the loop as well (spirv-opt inliner likes this) - // we should actually emit the temporary outside the loop. - hoisted_temporaries.insert(var.first); - forced_temporaries.insert(var.first); + if (handler.access_chain_expressions.count(var.first)) + { + // Exceptionally rare case. + // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). + // Rather than do that, we force a complex loop to make sure access chains are created and consumed + // in expected order. + auto &loop_header_block = get(dominating_block); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + loop_header_block.complex_continue = true; + } + else + { + // This should be very rare, but if we try to declare a temporary inside a loop, + // and that temporary is used outside the loop as well (spirv-opt inliner likes this) + // we should actually emit the temporary outside the loop. + hoisted_temporaries.insert(var.first); + forced_temporaries.insert(var.first); - auto &block_temporaries = get(dominating_block).declare_temporary; - block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + auto &block_temporaries = get(dominating_block).declare_temporary; + block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + } } else if (blocks.size() > 1) { diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 35f0132a..4e0b171c 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -916,6 +916,7 @@ protected: std::unordered_map result_id_to_type; std::unordered_map> complete_write_variables_to_block; std::unordered_map> partial_write_variables_to_block; + std::unordered_set access_chain_expressions; const SPIRBlock *current_block = nullptr; }; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 218396e9..0671e880 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -11302,7 +11302,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) assert(block.merge == SPIRBlock::MergeSelection); branch_to_continue(block.self, block.next_block); } - else + else if (block.self != block.next_block) emit_block_chain(get(block.next_block)); }