Implement OpAtomicLoad/OpAtomicStore.

Need some emulation on GLSL/HLSL, fix bug with atomic store on MSL.
This commit is contained in:
Hans-Kristian Arntzen 2020-04-27 12:09:59 +02:00
parent 7ba0f8f087
commit 9b7140e2ba
9 changed files with 290 additions and 27 deletions

View File

@ -0,0 +1,18 @@
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
RWByteAddressBuffer _5 : register(u0);
void comp_main()
{
uint _20;
_5.InterlockedAdd(4, 0, _20);
uint c = _20;
uint _23;
_5.InterlockedExchange(0, c, _23);
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@ -0,0 +1,23 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct SSBO
{
uint a;
uint b;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device SSBO& _5 [[buffer(0)]])
{
uint _20 = atomic_load_explicit((device atomic_uint*)&_5.b, memory_order_relaxed);
uint c = _20;
atomic_store_explicit((device atomic_uint*)&_5.a, c, memory_order_relaxed);
}

View File

@ -0,0 +1,16 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO
{
uint a;
uint b;
} _5;
void main()
{
uint _20 = atomicAdd(_5.b, 0u);
uint c = _20;
atomicExchange(_5.a, c);
}

View File

@ -0,0 +1,48 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 23
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %c "c"
OpName %SSBO "SSBO"
OpMemberName %SSBO 0 "a"
OpMemberName %SSBO 1 "b"
OpName %_ ""
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 4
OpDecorate %SSBO BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%SSBO = OpTypeStruct %uint %uint
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%int_0 = OpConstant %int 0
%v3uint = OpTypeVector %uint 3
%uint_1 = OpConstant %uint 1
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
%main = OpFunction %void None %3
%5 = OpLabel
%c = OpVariable %_ptr_Function_uint Function
%15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
%16 = OpAtomicLoad %uint %15 %int_1 %int_0
OpStore %c %16
%18 = OpLoad %uint %c
%19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
OpAtomicStore %19 %int_1 %int_0 %18
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,48 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 23
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %c "c"
OpName %SSBO "SSBO"
OpMemberName %SSBO 0 "a"
OpMemberName %SSBO 1 "b"
OpName %_ ""
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 4
OpDecorate %SSBO BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%SSBO = OpTypeStruct %uint %uint
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%int_0 = OpConstant %int 0
%v3uint = OpTypeVector %uint 3
%uint_1 = OpConstant %uint 1
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
%main = OpFunction %void None %3
%5 = OpLabel
%c = OpVariable %_ptr_Function_uint Function
%15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
%16 = OpAtomicLoad %uint %15 %int_1 %int_0
OpStore %c %16
%18 = OpLoad %uint %c
%19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
OpAtomicStore %19 %int_1 %int_0 %18
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,48 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 23
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %c "c"
OpName %SSBO "SSBO"
OpMemberName %SSBO 0 "a"
OpMemberName %SSBO 1 "b"
OpName %_ ""
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 4
OpDecorate %SSBO BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%SSBO = OpTypeStruct %uint %uint
%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
%_ = OpVariable %_ptr_Uniform_SSBO Uniform
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%int_0 = OpConstant %int 0
%v3uint = OpTypeVector %uint 3
%uint_1 = OpConstant %uint 1
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
%main = OpFunction %void None %3
%5 = OpLabel
%c = OpVariable %_ptr_Function_uint Function
%15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
%16 = OpAtomicLoad %uint %15 %int_1 %int_0
OpStore %c %16
%18 = OpLoad %uint %c
%19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
OpAtomicStore %19 %int_1 %int_0 %18
OpReturn
OpFunctionEnd

View File

@ -9794,15 +9794,33 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
case OpAtomicLoad:
flush_all_atomic_capable_variables();
// FIXME: Image?
// OpAtomicLoad seems to only be relevant for atomic counters.
{
// In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
auto &type = expression_type(ops[2]);
forced_temporaries.insert(ops[1]);
GLSL_UFOP(atomicCounter);
bool atomic_image = check_atomic_image(ops[2]);
bool unsigned_type = (type.basetype == SPIRType::UInt) ||
(atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
const char *increment = unsigned_type ? "0u" : "0";
emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
flush_all_atomic_capable_variables();
break;
}
case OpAtomicStore:
SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
{
// In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
uint32_t ptr = ops[0];
// Ignore semantics for now, probably only relevant to CL.
uint32_t val = ops[3];
const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
flush_all_atomic_capable_variables();
break;
}
case OpAtomicIIncrement:
case OpAtomicIDecrement:

View File

@ -4087,9 +4087,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
const char *atomic_op = nullptr;
string value_expr;
if (op != OpAtomicIDecrement && op != OpAtomicIIncrement)
if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore)
value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
bool is_atomic_store = false;
switch (op)
{
case OpAtomicIIncrement:
@ -4102,6 +4104,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
value_expr = "-1";
break;
case OpAtomicLoad:
atomic_op = "InterlockedAdd";
value_expr = "0";
break;
case OpAtomicISub:
atomic_op = "InterlockedAdd";
value_expr = join("-", enclose_expression(value_expr));
@ -4137,6 +4144,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
atomic_op = "InterlockedExchange";
break;
case OpAtomicStore:
atomic_op = "InterlockedExchange";
is_atomic_store = true;
break;
case OpAtomicCompareExchange:
if (length < 8)
SPIRV_CROSS_THROW("Not enough data for opcode.");
@ -4148,31 +4160,57 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
SPIRV_CROSS_THROW("Unknown atomic opcode.");
}
uint32_t result_type = ops[0];
uint32_t id = ops[1];
forced_temporaries.insert(ops[1]);
auto &type = get<SPIRType>(result_type);
statement(variable_decl(type, to_name(id)), ";");
auto &data_type = expression_type(ops[2]);
auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
SPIRType::BaseType expr_type;
if (data_type.storage == StorageClassImage || !chain)
if (is_atomic_store)
{
statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
expr_type = data_type.basetype;
auto &data_type = expression_type(ops[0]);
auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
auto &tmp_id = extra_sub_expressions[ops[0]];
if (!tmp_id)
{
tmp_id = ir.increase_bound_by(1);
emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id);
}
if (data_type.storage == StorageClassImage || !chain)
{
statement(atomic_op, "(", to_expression(ops[0]), ", ", to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
}
else
{
// RWByteAddress buffer is always uint in its underlying type.
statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", to_expression(ops[3]),
", ", to_expression(tmp_id), ");");
}
}
else
{
// RWByteAddress buffer is always uint in its underlying type.
expr_type = SPIRType::UInt;
statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ",
to_name(id), ");");
}
uint32_t result_type = ops[0];
uint32_t id = ops[1];
forced_temporaries.insert(ops[1]);
auto expr = bitcast_expression(type, expr_type, to_name(id));
set<SPIRExpression>(id, expr, result_type, true);
auto &type = get<SPIRType>(result_type);
statement(variable_decl(type, to_name(id)), ";");
auto &data_type = expression_type(ops[2]);
auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
SPIRType::BaseType expr_type;
if (data_type.storage == StorageClassImage || !chain)
{
statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
expr_type = data_type.basetype;
}
else
{
// RWByteAddress buffer is always uint in its underlying type.
expr_type = SPIRType::UInt;
statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
", ", to_name(id), ");");
}
auto expr = bitcast_expression(type, expr_type, to_name(id));
set<SPIRExpression>(id, expr, result_type, true);
}
flush_all_atomic_capable_variables();
}
@ -4967,6 +5005,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpAtomicIAdd:
case OpAtomicIIncrement:
case OpAtomicIDecrement:
case OpAtomicLoad:
case OpAtomicStore:
{
emit_atomic(ops, instruction.length, opcode);
break;

View File

@ -7038,7 +7038,11 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
exp += string(", ") + get_memory_order(mem_order_2);
exp += ")";
emit_op(result_type, result_id, exp, false);
if (strcmp(op, "atomic_store_explicit") != 0)
emit_op(result_type, result_id, exp, false);
else
statement(exp, ";");
}
flush_all_atomic_capable_variables();