Fix infinite loop when OpAtomic* temporaries are used in other blocks.
We made the mistake of registering a dependency on the atomic variable even if the atomic result was forced to a temporary. There is no need to register reads from atomic variables like this as we always force atomic results to a temporary and argument read/writes do not need to be tracked.
This commit is contained in:
parent
bbfc31339a
commit
8b236f24f1
@ -0,0 +1,24 @@
|
||||
RWByteAddressBuffer _5 : register(u0);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint _24;
|
||||
_5.InterlockedAdd(0, 1u, _24);
|
||||
if (_24 < 1024u)
|
||||
{
|
||||
_5.Store(_24 * 4 + 4, gl_GlobalInvocationID.x);
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
uint count;
|
||||
uint data[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
|
||||
if (_24 < 1024u)
|
||||
{
|
||||
_5.data[_24] = gl_GlobalInvocationID.x;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,18 @@
|
||||
#version 450
|
||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(binding = 0, std430) buffer SSBO
|
||||
{
|
||||
uint count;
|
||||
uint data[];
|
||||
} _5;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint _24 = atomicAdd(_5.count, 1u);
|
||||
if (_24 < 1024u)
|
||||
{
|
||||
_5.data[_24] = gl_GlobalInvocationID.x;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,59 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 35
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %SSBO "SSBO"
|
||||
OpMemberName %SSBO 0 "count"
|
||||
OpMemberName %SSBO 1 "data"
|
||||
OpName %_ ""
|
||||
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
|
||||
OpDecorate %_runtimearr_uint ArrayStride 4
|
||||
OpMemberDecorate %SSBO 0 Offset 0
|
||||
OpMemberDecorate %SSBO 1 Offset 4
|
||||
OpDecorate %SSBO BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%uint = OpTypeInt 32 0
|
||||
%_runtimearr_uint = OpTypeRuntimeArray %uint
|
||||
%SSBO = OpTypeStruct %uint %_runtimearr_uint
|
||||
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
|
||||
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_1024 = OpConstant %uint 1024
|
||||
%bool = OpTypeBool
|
||||
%int_1 = OpConstant %int 1
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
|
||||
%19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1
|
||||
%23 = OpULessThan %bool %19 %uint_1024
|
||||
OpSelectionMerge %25 None
|
||||
OpBranchConditional %23 %24 %25
|
||||
%24 = OpLabel
|
||||
%32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
|
||||
%33 = OpLoad %uint %32
|
||||
%34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19
|
||||
OpStore %34 %33
|
||||
OpBranch %25
|
||||
%25 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
59
shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
Normal file
59
shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
Normal file
@ -0,0 +1,59 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 35
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %SSBO "SSBO"
|
||||
OpMemberName %SSBO 0 "count"
|
||||
OpMemberName %SSBO 1 "data"
|
||||
OpName %_ ""
|
||||
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
|
||||
OpDecorate %_runtimearr_uint ArrayStride 4
|
||||
OpMemberDecorate %SSBO 0 Offset 0
|
||||
OpMemberDecorate %SSBO 1 Offset 4
|
||||
OpDecorate %SSBO BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%uint = OpTypeInt 32 0
|
||||
%_runtimearr_uint = OpTypeRuntimeArray %uint
|
||||
%SSBO = OpTypeStruct %uint %_runtimearr_uint
|
||||
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
|
||||
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_1024 = OpConstant %uint 1024
|
||||
%bool = OpTypeBool
|
||||
%int_1 = OpConstant %int 1
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
|
||||
%19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1
|
||||
%23 = OpULessThan %bool %19 %uint_1024
|
||||
OpSelectionMerge %25 None
|
||||
OpBranchConditional %23 %24 %25
|
||||
%24 = OpLabel
|
||||
%32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
|
||||
%33 = OpLoad %uint %32
|
||||
%34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19
|
||||
OpStore %34 %33
|
||||
OpBranch %25
|
||||
%25 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
59
shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp
Normal file
59
shaders-no-opt/asm/comp/atomic-result-temporary.asm.comp
Normal file
@ -0,0 +1,59 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 35
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %SSBO "SSBO"
|
||||
OpMemberName %SSBO 0 "count"
|
||||
OpMemberName %SSBO 1 "data"
|
||||
OpName %_ ""
|
||||
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
|
||||
OpDecorate %_runtimearr_uint ArrayStride 4
|
||||
OpMemberDecorate %SSBO 0 Offset 0
|
||||
OpMemberDecorate %SSBO 1 Offset 4
|
||||
OpDecorate %SSBO BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%uint = OpTypeInt 32 0
|
||||
%_runtimearr_uint = OpTypeRuntimeArray %uint
|
||||
%SSBO = OpTypeStruct %uint %_runtimearr_uint
|
||||
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
|
||||
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_1024 = OpConstant %uint 1024
|
||||
%bool = OpTypeBool
|
||||
%int_1 = OpConstant %int 1
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
|
||||
%19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1
|
||||
%23 = OpULessThan %bool %19 %uint_1024
|
||||
OpSelectionMerge %25 None
|
||||
OpBranchConditional %23 %24 %25
|
||||
%24 = OpLabel
|
||||
%32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
|
||||
%33 = OpLoad %uint %32
|
||||
%34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19
|
||||
OpStore %34 %33
|
||||
OpBranch %25
|
||||
%25 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -8418,8 +8418,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
flush_all_atomic_capable_variables();
|
||||
// FIXME: Image?
|
||||
// OpAtomicLoad seems to only be relevant for atomic counters.
|
||||
forced_temporaries.insert(ops[1]);
|
||||
GLSL_UFOP(atomicCounter);
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
|
||||
case OpAtomicStore:
|
||||
@ -8459,7 +8459,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8469,7 +8468,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8480,7 +8478,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
|
||||
emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8491,7 +8488,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8502,7 +8498,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8512,7 +8507,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8522,7 +8516,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8532,7 +8525,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
forced_temporaries.insert(ops[1]);
|
||||
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3713,7 +3713,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
|
||||
auto expr = bitcast_expression(type, expr_type, to_name(id));
|
||||
set<SPIRExpression>(id, expr, result_type, true);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
}
|
||||
|
||||
void CompilerHLSL::emit_subgroup_op(const Instruction &i)
|
||||
|
Loading…
Reference in New Issue
Block a user