Force complex loop in certain rare access chain scenarios.

If we generate an access chain in a loop body, and it is consumed in the
loop continue block, we have a problem because we cannot emit a
temporary here holding the access chain reference. Force a complex loop
body to workaround this exceptionally rare case.
This commit is contained in:
Hans-Kristian Arntzen 2019-04-10 15:46:48 +02:00
parent 5325210953
commit e23c9ea700
8 changed files with 224 additions and 11 deletions

View File

@ -46,10 +46,11 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(3)]],
threadgroup int* cur = stgsm;
device int* _73;
_73 = &buf.a[0u];
threadgroup int* _76;
int _77;
for (;;)
{
threadgroup int* _76 = cur;
_76 = cur;
_77 = *_73;
if (_77 != 0)
{

View File

@ -0,0 +1,30 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO
{
int values[];
} _4;
void main()
{
int _17 = 0;
for (;;)
{
if (_17 < 100)
{
int _24 = _4.values[_17];
_4.values[_24] = _17;
int _26 = _24 + 1;
int _18 = _4.values[_26];
_4.values[_17] = _18;
_17 = _18;
continue;
}
else
{
break;
}
}
}

View File

@ -0,0 +1,27 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO
{
int values[];
} _4;
void main()
{
int _17 = 0;
for (;;)
{
if (_17 < 100)
{
int _24 = _4.values[_17];
_4.values[_24] = _17;
_17 = _4.values[_24 + 1];
continue;
}
else
{
break;
}
}
}

View File

@ -0,0 +1,55 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 52
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %SSBO "SSBO"
OpMemberName %SSBO 0 "values"
OpName %_ ""
OpDecorate %_runtimearr_int ArrayStride 4
OpMemberDecorate %SSBO 0 Offset 0
OpDecorate %SSBO BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_100 = OpConstant %int 100
%bool = OpTypeBool
%_runtimearr_int = OpTypeRuntimeArray %int
%SSBO = OpTypeStruct %_runtimearr_int
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
%_ptr_Uniform_int = OpTypePointer Uniform %int
%int_1 = OpConstant %int 1
%main = OpFunction %void None %3
%5 = OpLabel
OpBranch %32
%32 = OpLabel
%51 = OpPhi %int %int_0 %5 %49 %loop_continue
%38 = OpSLessThan %bool %51 %int_100
OpLoopMerge %loop_merge %loop_continue None
OpBranchConditional %38 %loop_body %loop_merge
%loop_body = OpLabel
%40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51
OpBranch %loop_continue
%loop_continue = OpLabel
%41 = OpLoad %int %40
%44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41
OpStore %44 %51
%47 = OpIAdd %int %41 %int_1
%48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47
%49 = OpLoad %int %48
OpStore %40 %49
OpBranch %32
%loop_merge = OpLabel
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,54 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 52
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %SSBO "SSBO"
OpMemberName %SSBO 0 "values"
OpName %_ ""
OpDecorate %_runtimearr_int ArrayStride 4
OpMemberDecorate %SSBO 0 Offset 0
OpDecorate %SSBO BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_100 = OpConstant %int 100
%bool = OpTypeBool
%_runtimearr_int = OpTypeRuntimeArray %int
%SSBO = OpTypeStruct %_runtimearr_int
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
%_ptr_Uniform_int = OpTypePointer Uniform %int
%int_1 = OpConstant %int 1
%main = OpFunction %void None %3
%5 = OpLabel
OpBranch %32
%32 = OpLabel
%51 = OpPhi %int %int_0 %5 %49 %loop_continue
%38 = OpSLessThan %bool %51 %int_100
OpLoopMerge %loop_merge %loop_continue None
OpBranchConditional %38 %loop_body %loop_merge
%loop_body = OpLabel
%40 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %51
OpBranch %loop_continue
%loop_continue = OpLabel
%41 = OpLoad %int %40
%44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %41
OpStore %44 %51
%47 = OpIAdd %int %41 %int_1
%48 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %47
%49 = OpLoad %int %48
OpBranch %32
%loop_merge = OpLabel
OpReturn
OpFunctionEnd

View File

@ -2874,6 +2874,9 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl
void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
{
if (id == 0)
return;
if (id_is_phi_variable(id))
accessed_variables_to_block[id].insert(block);
else if (id_is_potential_temporary(id))
@ -2924,6 +2927,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
partial_write_variables_to_block[var->self].insert(current_block->self);
}
// args[0] might be an access chain we have to track use of.
notify_variable_access(args[0], current_block->self);
// Might try to store a Phi variable here.
notify_variable_access(args[1], current_block->self);
break;
@ -2941,9 +2946,16 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
if (var)
accessed_variables_to_block[var->self].insert(current_block->self);
for (uint32_t i = 3; i < length; i++)
// args[2] might be another access chain we have to track use of.
for (uint32_t i = 2; i < length; i++)
notify_variable_access(args[i], current_block->self);
// Also keep track of the access chain pointer itself.
// In exceptionally rare cases, we can end up with a case where
// the access chain is generated in the loop body, but is consumed in continue block.
// This means we need complex loop workarounds, and we must detect this via CFG analysis.
notify_variable_access(args[1], current_block->self);
// The result of an access chain is a fixed expression and is not really considered a temporary.
auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
@ -2951,6 +2963,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
// Other backends might use SPIRAccessChain for this later.
compiler.ir.ids[args[1]].set_allow_type_rewrite();
access_chain_expressions.insert(args[1]);
break;
}
@ -2973,6 +2986,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
partial_write_variables_to_block[var->self].insert(current_block->self);
}
// args[0:1] might be access chains we have to track use of.
for (uint32_t i = 0; i < 2; i++)
notify_variable_access(args[i], current_block->self);
var = compiler.maybe_get_backing_variable(rhs);
if (var)
accessed_variables_to_block[var->self].insert(current_block->self);
@ -2988,6 +3005,11 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
if (var)
accessed_variables_to_block[var->self].insert(current_block->self);
// Might be an access chain which we have to keep track of.
notify_variable_access(args[1], current_block->self);
if (access_chain_expressions.count(args[2]))
access_chain_expressions.insert(args[1]);
// Might try to copy a Phi variable here.
notify_variable_access(args[2], current_block->self);
break;
@ -3004,6 +3026,9 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
// Loaded value is a temporary.
notify_variable_access(args[1], current_block->self);
// Might be an access chain we have to track use of.
notify_variable_access(args[2], current_block->self);
break;
}
@ -3370,7 +3395,14 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
// If a temporary is used in more than one block, we might have to lift continue block
// access up to loop header like we did for variables.
if (blocks.size() != 1 && is_continue(block))
builder.add_block(ir.continue_block_to_loop_header[block]);
{
auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
assert(loop_header_block.merge == SPIRBlock::MergeLoop);
// Only relevant if the loop is not marked as complex.
if (!loop_header_block.complex_continue)
builder.add_block(loop_header_block.self);
}
else if (blocks.size() != 1 && is_single_block_loop(block))
{
// Awkward case, because the loop header is also the continue block.
@ -3387,14 +3419,27 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
if (!first_use_is_dominator || force_temporary)
{
// This should be very rare, but if we try to declare a temporary inside a loop,
// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
// we should actually emit the temporary outside the loop.
hoisted_temporaries.insert(var.first);
forced_temporaries.insert(var.first);
if (handler.access_chain_expressions.count(var.first))
{
// Exceptionally rare case.
// We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
// Rather than do that, we force a complex loop to make sure access chains are created and consumed
// in expected order.
auto &loop_header_block = get<SPIRBlock>(dominating_block);
assert(loop_header_block.merge == SPIRBlock::MergeLoop);
loop_header_block.complex_continue = true;
}
else
{
// This should be very rare, but if we try to declare a temporary inside a loop,
// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
// we should actually emit the temporary outside the loop.
hoisted_temporaries.insert(var.first);
forced_temporaries.insert(var.first);
auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
}
}
else if (blocks.size() > 1)
{

View File

@ -916,6 +916,7 @@ protected:
std::unordered_map<uint32_t, uint32_t> result_id_to_type;
std::unordered_map<uint32_t, std::unordered_set<uint32_t>> complete_write_variables_to_block;
std::unordered_map<uint32_t, std::unordered_set<uint32_t>> partial_write_variables_to_block;
std::unordered_set<uint32_t> access_chain_expressions;
const SPIRBlock *current_block = nullptr;
};

View File

@ -11302,7 +11302,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
assert(block.merge == SPIRBlock::MergeSelection);
branch_to_continue(block.self, block.next_block);
}
else
else if (block.self != block.next_block)
emit_block_chain(get<SPIRBlock>(block.next_block));
}