Force complex loop in certain rare access chain scenarios.
If we generate an access chain in a loop body, and it is consumed in the loop continue block, we have a problem because we cannot emit a temporary here holding the access chain reference. Force a complex loop body to workaround this exceptionally rare case.
This commit is contained in:
parent
5325210953
commit
e23c9ea700
@ -46,10 +46,11 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(3)]],
|
||||
threadgroup int* cur = stgsm;
|
||||
device int* _73;
|
||||
_73 = &buf.a[0u];
|
||||
threadgroup int* _76;
|
||||
int _77;
|
||||
for (;;)
|
||||
{
|
||||
threadgroup int* _76 = cur;
|
||||
_76 = cur;
|
||||
_77 = *_73;
|
||||
if (_77 != 0)
|
||||
{
|
||||
|
@ -0,0 +1,30 @@
|
||||
#version 450
|
||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(binding = 0, std430) buffer SSBO
|
||||
{
|
||||
int values[];
|
||||
} _4;
|
||||
|
||||
void main()
|
||||
{
|
||||
int _17 = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (_17 < 100)
|
||||
{
|
||||
int _24 = _4.values[_17];
|
||||
_4.values[_24] = _17;
|
||||
int _26 = _24 + 1;
|
||||
int _18 = _4.values[_26];
|
||||
_4.values[_17] = _18;
|
||||
_17 = _18;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,27 @@
|
||||
#version 450
|
||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(binding = 0, std430) buffer SSBO
|
||||
{
|
||||
int values[];
|
||||
} _4;
|
||||
|
||||
void main()
|
||||
{
|
||||
int _17 = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (_17 < 100)
|
||||
{
|
||||
int _24 = _4.values[_17];
|
||||
_4.values[_24] = _17;
|
||||
_17 = _4.values[_24 + 1];
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,55 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 52
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %SSBO "SSBO"
|
||||
OpMemberName %SSBO 0 "values"
|
||||
OpName %_ ""
|
||||
OpDecorate %_runtimearr_int ArrayStride 4
|
||||
OpMemberDecorate %SSBO 0 Offset 0
|
||||
OpDecorate %SSBO BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_100 = OpConstant %int 100
|
||||
%bool = OpTypeBool
|
||||
%_runtimearr_int = OpTypeRuntimeArray %int
|
||||
%SSBO = OpTypeStruct %_runtimearr_int
|
||||
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
|
||||
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
|
||||
%_ptr_Uniform_int = OpTypePointer Uniform %int
|
||||
%int_1 = OpConstant %int 1
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
OpBranch %32
|
||||
%32 = OpLabel
|
||||
%51 = OpPhi %int %int_0 %5 %49 %loop_continue
|
||||
%38 = OpSLessThan %bool %51 %int_100
|
||||
OpLoopMerge %loop_merge %loop_continue None
|
||||
OpBranchConditional %38 %loop_body %loop_merge
|
||||
%loop_body = OpLabel
|
||||
%40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51
|
||||
OpBranch %loop_continue
|
||||
%loop_continue = OpLabel
|
||||
%41 = OpLoad %int %40
|
||||
%44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41
|
||||
OpStore %44 %51
|
||||
%47 = OpIAdd %int %41 %int_1
|
||||
%48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47
|
||||
%49 = OpLoad %int %48
|
||||
OpStore %40 %49
|
||||
OpBranch %32
|
||||
%loop_merge = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -0,0 +1,54 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 52
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %SSBO "SSBO"
|
||||
OpMemberName %SSBO 0 "values"
|
||||
OpName %_ ""
|
||||
OpDecorate %_runtimearr_int ArrayStride 4
|
||||
OpMemberDecorate %SSBO 0 Offset 0
|
||||
OpDecorate %SSBO BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_100 = OpConstant %int 100
|
||||
%bool = OpTypeBool
|
||||
%_runtimearr_int = OpTypeRuntimeArray %int
|
||||
%SSBO = OpTypeStruct %_runtimearr_int
|
||||
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
|
||||
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
|
||||
%_ptr_Uniform_int = OpTypePointer Uniform %int
|
||||
%int_1 = OpConstant %int 1
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
OpBranch %32
|
||||
%32 = OpLabel
|
||||
%51 = OpPhi %int %int_0 %5 %49 %loop_continue
|
||||
%38 = OpSLessThan %bool %51 %int_100
|
||||
OpLoopMerge %loop_merge %loop_continue None
|
||||
OpBranchConditional %38 %loop_body %loop_merge
|
||||
%loop_body = OpLabel
|
||||
%40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51
|
||||
OpBranch %loop_continue
|
||||
%loop_continue = OpLabel
|
||||
%41 = OpLoad %int %40
|
||||
%44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41
|
||||
OpStore %44 %51
|
||||
%47 = OpIAdd %int %41 %int_1
|
||||
%48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47
|
||||
%49 = OpLoad %int %48
|
||||
OpBranch %32
|
||||
%loop_merge = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -2874,6 +2874,9 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl
|
||||
|
||||
void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
|
||||
{
|
||||
if (id == 0)
|
||||
return;
|
||||
|
||||
if (id_is_phi_variable(id))
|
||||
accessed_variables_to_block[id].insert(block);
|
||||
else if (id_is_potential_temporary(id))
|
||||
@ -2924,6 +2927,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
partial_write_variables_to_block[var->self].insert(current_block->self);
|
||||
}
|
||||
|
||||
// args[0] might be an access chain we have to track use of.
|
||||
notify_variable_access(args[0], current_block->self);
|
||||
// Might try to store a Phi variable here.
|
||||
notify_variable_access(args[1], current_block->self);
|
||||
break;
|
||||
@ -2941,9 +2946,16 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
if (var)
|
||||
accessed_variables_to_block[var->self].insert(current_block->self);
|
||||
|
||||
for (uint32_t i = 3; i < length; i++)
|
||||
// args[2] might be another access chain we have to track use of.
|
||||
for (uint32_t i = 2; i < length; i++)
|
||||
notify_variable_access(args[i], current_block->self);
|
||||
|
||||
// Also keep track of the access chain pointer itself.
|
||||
// In exceptionally rare cases, we can end up with a case where
|
||||
// the access chain is generated in the loop body, but is consumed in continue block.
|
||||
// This means we need complex loop workarounds, and we must detect this via CFG analysis.
|
||||
notify_variable_access(args[1], current_block->self);
|
||||
|
||||
// The result of an access chain is a fixed expression and is not really considered a temporary.
|
||||
auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
|
||||
auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
|
||||
@ -2951,6 +2963,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
|
||||
// Other backends might use SPIRAccessChain for this later.
|
||||
compiler.ir.ids[args[1]].set_allow_type_rewrite();
|
||||
access_chain_expressions.insert(args[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2973,6 +2986,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
partial_write_variables_to_block[var->self].insert(current_block->self);
|
||||
}
|
||||
|
||||
// args[0:1] might be access chains we have to track use of.
|
||||
for (uint32_t i = 0; i < 2; i++)
|
||||
notify_variable_access(args[i], current_block->self);
|
||||
|
||||
var = compiler.maybe_get_backing_variable(rhs);
|
||||
if (var)
|
||||
accessed_variables_to_block[var->self].insert(current_block->self);
|
||||
@ -2988,6 +3005,11 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
if (var)
|
||||
accessed_variables_to_block[var->self].insert(current_block->self);
|
||||
|
||||
// Might be an access chain which we have to keep track of.
|
||||
notify_variable_access(args[1], current_block->self);
|
||||
if (access_chain_expressions.count(args[2]))
|
||||
access_chain_expressions.insert(args[1]);
|
||||
|
||||
// Might try to copy a Phi variable here.
|
||||
notify_variable_access(args[2], current_block->self);
|
||||
break;
|
||||
@ -3004,6 +3026,9 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
||||
|
||||
// Loaded value is a temporary.
|
||||
notify_variable_access(args[1], current_block->self);
|
||||
|
||||
// Might be an access chain we have to track use of.
|
||||
notify_variable_access(args[2], current_block->self);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -3370,7 +3395,14 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
|
||||
// If a temporary is used in more than one block, we might have to lift continue block
|
||||
// access up to loop header like we did for variables.
|
||||
if (blocks.size() != 1 && is_continue(block))
|
||||
builder.add_block(ir.continue_block_to_loop_header[block]);
|
||||
{
|
||||
auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
|
||||
assert(loop_header_block.merge == SPIRBlock::MergeLoop);
|
||||
|
||||
// Only relevant if the loop is not marked as complex.
|
||||
if (!loop_header_block.complex_continue)
|
||||
builder.add_block(loop_header_block.self);
|
||||
}
|
||||
else if (blocks.size() != 1 && is_single_block_loop(block))
|
||||
{
|
||||
// Awkward case, because the loop header is also the continue block.
|
||||
@ -3387,14 +3419,27 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
|
||||
|
||||
if (!first_use_is_dominator || force_temporary)
|
||||
{
|
||||
// This should be very rare, but if we try to declare a temporary inside a loop,
|
||||
// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
|
||||
// we should actually emit the temporary outside the loop.
|
||||
hoisted_temporaries.insert(var.first);
|
||||
forced_temporaries.insert(var.first);
|
||||
if (handler.access_chain_expressions.count(var.first))
|
||||
{
|
||||
// Exceptionally rare case.
|
||||
// We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
|
||||
// Rather than do that, we force a complex loop to make sure access chains are created and consumed
|
||||
// in expected order.
|
||||
auto &loop_header_block = get<SPIRBlock>(dominating_block);
|
||||
assert(loop_header_block.merge == SPIRBlock::MergeLoop);
|
||||
loop_header_block.complex_continue = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// This should be very rare, but if we try to declare a temporary inside a loop,
|
||||
// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
|
||||
// we should actually emit the temporary outside the loop.
|
||||
hoisted_temporaries.insert(var.first);
|
||||
forced_temporaries.insert(var.first);
|
||||
|
||||
auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
|
||||
block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
|
||||
auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
|
||||
block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
|
||||
}
|
||||
}
|
||||
else if (blocks.size() > 1)
|
||||
{
|
||||
|
@ -916,6 +916,7 @@ protected:
|
||||
std::unordered_map<uint32_t, uint32_t> result_id_to_type;
|
||||
std::unordered_map<uint32_t, std::unordered_set<uint32_t>> complete_write_variables_to_block;
|
||||
std::unordered_map<uint32_t, std::unordered_set<uint32_t>> partial_write_variables_to_block;
|
||||
std::unordered_set<uint32_t> access_chain_expressions;
|
||||
const SPIRBlock *current_block = nullptr;
|
||||
};
|
||||
|
||||
|
@ -11302,7 +11302,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
assert(block.merge == SPIRBlock::MergeSelection);
|
||||
branch_to_continue(block.self, block.next_block);
|
||||
}
|
||||
else
|
||||
else if (block.self != block.next_block)
|
||||
emit_block_chain(get<SPIRBlock>(block.next_block));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user