Implement atomic increment/decrement in GLSL and HLSL.

This commit is contained in:
Hans-Kristian Arntzen 2018-09-17 15:46:39 +02:00
parent a77880787d
commit 737715214e
14 changed files with 491 additions and 13 deletions

View File

@ -0,0 +1,22 @@
RWByteAddressBuffer u0_counter : register(u1);
RWBuffer<uint> u0 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint _29;
u0_counter.InterlockedAdd(0, -1, _29);
u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x;
}
[numthreads(4, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View File

@ -0,0 +1,22 @@
RWByteAddressBuffer u0_counter : register(u1);
RWBuffer<uint> u0 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint _29;
u0_counter.InterlockedAdd(0, 1, _29);
u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x;
}
[numthreads(4, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View File

@ -0,0 +1,16 @@
#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer u0_counters
{
uint c;
} u0_counter;
layout(binding = 0, r32ui) uniform writeonly uimageBuffer u0;
void main()
{
uint _29 = atomicAdd(u0_counter.c, uint(-1));
imageStore(u0, int(uint(floatBitsToInt(uintBitsToFloat(_29))) + 0u), uvec4(uint(int(gl_GlobalInvocationID.x))));
}

View File

@ -0,0 +1,16 @@
#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer u0_counters
{
uint c;
} u0_counter;
layout(binding = 0, r32ui) uniform writeonly uimageBuffer u0;
void main()
{
uint _29 = atomicAdd(u0_counter.c, 1u);
imageStore(u0, int(uint(floatBitsToInt(uintBitsToFloat(_29))) + 0u), uvec4(uint(int(gl_GlobalInvocationID.x))));
}

View File

@ -0,0 +1,24 @@
RWByteAddressBuffer u0_counter : register(u1);
RWBuffer<uint> u0 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint _29;
u0_counter.InterlockedAdd(0, -1, _29);
float4 r0;
r0.x = asfloat(_29);
u0[(uint(asint(r0.x)) * 1u) + (uint(0) >> 2u)] = uint(int(gl_GlobalInvocationID.x)).x;
}
[numthreads(4, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View File

@ -0,0 +1,24 @@
RWByteAddressBuffer u0_counter : register(u1);
RWBuffer<uint> u0 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint _29;
u0_counter.InterlockedAdd(0, 1, _29);
float4 r0;
r0.x = asfloat(_29);
u0[(uint(asint(r0.x)) * 1u) + (uint(0) >> 2u)] = uint(int(gl_GlobalInvocationID.x)).x;
}
[numthreads(4, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View File

@ -0,0 +1,18 @@
#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer u0_counters
{
uint c;
} u0_counter;
layout(binding = 0, r32ui) uniform writeonly uimageBuffer u0;
void main()
{
uint _29 = atomicAdd(u0_counter.c, uint(-1));
vec4 r0;
r0.x = uintBitsToFloat(_29);
imageStore(u0, int((uint(floatBitsToInt(r0.x)) * 1u) + (uint(0) >> 2u)), uvec4(uint(int(gl_GlobalInvocationID.x))));
}

View File

@ -0,0 +1,18 @@
#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer u0_counters
{
uint c;
} u0_counter;
layout(binding = 0, r32ui) uniform writeonly uimageBuffer u0;
void main()
{
uint _29 = atomicAdd(u0_counter.c, 1u);
vec4 r0;
r0.x = uintBitsToFloat(_29);
imageStore(u0, int((uint(floatBitsToInt(r0.x)) * 1u) + (uint(0) >> 2u)), uvec4(uint(int(gl_GlobalInvocationID.x))));
}

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIDecrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIIncrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 0
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIDecrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 0
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIIncrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -7343,20 +7343,42 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);
// FIXME: Image?
GLSL_UFOP(atomicCounterIncrement);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
case OpAtomicIDecrement:
{
forced_temporaries.insert(ops[1]);
// FIXME: Image?
GLSL_UFOP(atomicCounterDecrement);
auto &type = expression_type(ops[2]);
if (type.storage == StorageClassAtomicCounter)
{
// Legacy GLSL stuff, not sure if this is relevant to support.
if (opcode == OpAtomicIIncrement)
GLSL_UFOP(atomicCounterIncrement);
else
GLSL_UFOP(atomicCounterDecrement);
}
else
{
bool atomic_image = check_atomic_image(ops[2]);
bool unsigned_type = (type.basetype == SPIRType::UInt) ||
(atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
const char *increment = nullptr;
if (opcode == OpAtomicIIncrement && unsigned_type)
increment = "1u";
else if (opcode == OpAtomicIIncrement)
increment = "1";
else if (unsigned_type)
increment = "uint(-1)";
else
increment = "-1";
emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
}
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicIAdd:
{

View File

@ -3591,10 +3591,23 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
{
const char *atomic_op = nullptr;
auto value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
string value_expr;
if (op != OpAtomicIDecrement && op != OpAtomicIIncrement)
value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
switch (op)
{
case OpAtomicIIncrement:
atomic_op = "InterlockedAdd";
value_expr = "1";
break;
case OpAtomicIDecrement:
atomic_op = "InterlockedAdd";
value_expr = "-1";
break;
case OpAtomicISub:
atomic_op = "InterlockedAdd";
value_expr = join("-", enclose_expression(value_expr));
@ -3641,9 +3654,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
SPIRV_CROSS_THROW("Unknown atomic opcode.");
}
if (length < 6)
SPIRV_CROSS_THROW("Not enough data for opcode.");
uint32_t result_type = ops[0];
uint32_t id = ops[1];
forced_temporaries.insert(ops[1]);
@ -4301,6 +4311,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpAtomicOr:
case OpAtomicXor:
case OpAtomicIAdd:
case OpAtomicIIncrement:
case OpAtomicIDecrement:
{
emit_atomic(ops, instruction.length, opcode);
break;