From 8b236f24f1b8d0bbc3d6e14a29addc7beafd35db Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 24 Apr 2019 09:31:44 +0200 Subject: [PATCH] Fix infinite loop when OpAtomic* temporaries are used in other blocks. We made the mistake of registering a dependency on the atomic variable even if the atomic result was forced to a temporary. There is no need to register reads from atomic variables like this as we always force atomic results to a temporary and argument read/writes do not need to be tracked. --- .../asm/comp/atomic-result-temporary.asm.comp | 24 ++++++++ .../asm/comp/atomic-result-temporary.asm.comp | 23 ++++++++ .../asm/comp/atomic-result-temporary.asm.comp | 18 ++++++ .../asm/comp/atomic-result-temporary.asm.comp | 59 +++++++++++++++++++ .../asm/comp/atomic-result-temporary.asm.comp | 59 +++++++++++++++++++ .../asm/comp/atomic-result-temporary.asm.comp | 59 +++++++++++++++++++ spirv_glsl.cpp | 10 +--- spirv_hlsl.cpp | 1 - 8 files changed, 243 insertions(+), 10 deletions(-) create mode 100644 reference/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 reference/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp diff --git a/reference/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..3a03fafe --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,24 @@ +RWByteAddressBuffer _5 : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + uint _24; + _5.InterlockedAdd(0, 1u, _24); + if (_24 < 1024u) + { + _5.Store(_24 * 4 + 4, gl_GlobalInvocationID.x); + } +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..8b669428 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,23 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + uint count; + uint data[1]; +}; + +kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed); + if (_24 < 1024u) + { + _5.data[_24] = gl_GlobalInvocationID.x; + } +} + diff --git a/reference/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..b51c6c58 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + uint count; + uint data[]; +} _5; + +void main() +{ + uint _24 = atomicAdd(_5.count, 1u); + if (_24 < 1024u) + { + _5.data[_24] = gl_GlobalInvocationID.x; + } +} + diff --git a/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..a3238415 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "count" + OpMemberName %SSBO 1 "data" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %uint %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %uint_1024 = OpConstant %uint 1024 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1 + %23 = OpULessThan %bool %19 %uint_1024 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %33 = OpLoad %uint %32 + %34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..a3238415 --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "count" + OpMemberName %SSBO 1 "data" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %uint %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %uint_1024 = OpConstant %uint 1024 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1 + %23 = OpULessThan %bool %19 %uint_1024 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %33 = OpLoad %uint %32 + %34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp b/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 00000000..a3238415 --- /dev/null +++ b/shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "count" + OpMemberName %SSBO 1 "data" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %uint %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %uint_1024 = OpConstant %uint 1024 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1 + %23 = OpULessThan %bool %19 %uint_1024 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %33 = OpLoad %uint %32 + %34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0671e880..cf9e2d76 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -8418,8 +8418,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) flush_all_atomic_capable_variables(); // FIXME: Image? // OpAtomicLoad seems to only be relevant for atomic counters. + forced_temporaries.insert(ops[1]); GLSL_UFOP(atomicCounter); - register_read(ops[1], ops[2], should_forward(ops[2])); break; case OpAtomicStore: @@ -8459,7 +8459,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8469,7 +8468,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8480,7 +8478,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8491,7 +8488,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8502,7 +8498,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8512,7 +8507,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8522,7 +8516,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } @@ -8532,7 +8525,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); break; } diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 871d18af..e6399625 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -3713,7 +3713,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) auto expr = bitcast_expression(type, expr_type, to_name(id)); set(id, expr, result_type, true); flush_all_atomic_capable_variables(); - register_read(ops[1], ops[2], should_forward(ops[2])); } void CompilerHLSL::emit_subgroup_op(const Instruction &i)