diff --git a/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..4f6a3e34 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,18 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + +RWByteAddressBuffer _5 : register(u0); + +void comp_main() +{ + uint _20; + _5.InterlockedAdd(4, 0, _20); + uint c = _20; + uint _23; + _5.InterlockedExchange(0, c, _23); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..1015d2a5 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,23 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct SSBO +{ + uint a; + uint b; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device SSBO& _5 [[buffer(0)]]) +{ + uint _20 = atomic_load_explicit((device atomic_uint*)&_5.b, memory_order_relaxed); + uint c = _20; + atomic_store_explicit((device atomic_uint*)&_5.a, c, memory_order_relaxed); +} + diff --git a/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..10a54fc8 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + uint a; + uint b; +} _5; + +void main() +{ + uint _20 = atomicAdd(_5.b, 0u); + uint c = _20; + atomicExchange(_5.a, c); +} + diff --git a/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..3f2d141a --- /dev/null +++ b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..3f2d141a --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 00000000..3f2d141a --- /dev/null +++ b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 60460d81..50cc79ab 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -9794,15 +9794,33 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } case OpAtomicLoad: - flush_all_atomic_capable_variables(); - // FIXME: Image? - // OpAtomicLoad seems to only be relevant for atomic counters. + { + // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + auto &type = expression_type(ops[2]); forced_temporaries.insert(ops[1]); - GLSL_UFOP(atomicCounter); + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + const char *increment = unsigned_type ? "0u" : "0"; + emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + flush_all_atomic_capable_variables(); break; + } case OpAtomicStore: - SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore."); + { + // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + uint32_t ptr = ops[0]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[3]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");"); + flush_all_atomic_capable_variables(); + break; + } case OpAtomicIIncrement: case OpAtomicIDecrement: diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 73e46bc2..f27163c2 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -4087,9 +4087,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) const char *atomic_op = nullptr; string value_expr; - if (op != OpAtomicIDecrement && op != OpAtomicIIncrement) + if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore) value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + bool is_atomic_store = false; + switch (op) { case OpAtomicIIncrement: @@ -4102,6 +4104,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) value_expr = "-1"; break; + case OpAtomicLoad: + atomic_op = "InterlockedAdd"; + value_expr = "0"; + break; + case OpAtomicISub: atomic_op = "InterlockedAdd"; value_expr = join("-", enclose_expression(value_expr)); @@ -4137,6 +4144,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) atomic_op = "InterlockedExchange"; break; + case OpAtomicStore: + atomic_op = "InterlockedExchange"; + is_atomic_store = true; + break; + case OpAtomicCompareExchange: if (length < 8) SPIRV_CROSS_THROW("Not enough data for opcode."); @@ -4148,31 +4160,57 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) SPIRV_CROSS_THROW("Unknown atomic opcode."); } - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - forced_temporaries.insert(ops[1]); - - auto &type = get(result_type); - statement(variable_decl(type, to_name(id)), ";"); - - auto &data_type = expression_type(ops[2]); - auto *chain = maybe_get(ops[2]); - SPIRType::BaseType expr_type; - if (data_type.storage == StorageClassImage || !chain) + if (is_atomic_store) { - statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); - expr_type = data_type.basetype; + auto &data_type = expression_type(ops[0]); + auto *chain = maybe_get(ops[0]); + + auto &tmp_id = extra_sub_expressions[ops[0]]; + if (!tmp_id) + { + tmp_id = ir.increase_bound_by(1); + emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id); + } + + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[0]), ", ", to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", to_expression(ops[3]), + ", ", to_expression(tmp_id), ");"); + } } else { - // RWByteAddress buffer is always uint in its underlying type. - expr_type = SPIRType::UInt; - statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", - to_name(id), ");"); - } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); - auto expr = bitcast_expression(type, expr_type, to_name(id)); - set(id, expr, result_type, true); + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expr_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expr_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + expr_type = SPIRType::UInt; + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, + ", ", to_name(id), ");"); + } + + auto expr = bitcast_expression(type, expr_type, to_name(id)); + set(id, expr, result_type, true); + } flush_all_atomic_capable_variables(); } @@ -4967,6 +5005,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpAtomicIAdd: case OpAtomicIIncrement: case OpAtomicIDecrement: + case OpAtomicLoad: + case OpAtomicStore: { emit_atomic(ops, instruction.length, opcode); break; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 597cf89c..635b9120 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -7038,7 +7038,11 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, exp += string(", ") + get_memory_order(mem_order_2); exp += ")"; - emit_op(result_type, result_id, exp, false); + + if (strcmp(op, "atomic_store_explicit") != 0) + emit_op(result_type, result_id, exp, false); + else + statement(exp, ";"); } flush_all_atomic_capable_variables();