Merge pull request #693 from cdavis5e/msl-atomic-inc-dec

MSL: Fix OpAtomicIIncrement and OpAtomicIDecrement.
This commit is contained in:
Hans-Kristian Arntzen 2018-09-13 16:19:27 +02:00 committed by GitHub
commit 1bbb4032c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 271 additions and 15 deletions

View File

@ -0,0 +1,26 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct u0_counters
{
uint c;
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
return uint2(tc % 4096, tc / 4096);
}
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord((uint(as_type<int>(as_type<float>(_29))) + 0u)));
}

View File

@ -0,0 +1,26 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct u0_counters
{
uint c;
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
return uint2(tc % 4096, tc / 4096);
}
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord((uint(as_type<int>(as_type<float>(_29))) + 0u)));
}

View File

@ -0,0 +1,28 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct u0_counters
{
uint c;
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
return uint2(tc % 4096, tc / 4096);
}
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
float4 r0;
r0.x = as_type<float>(_29);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
}

View File

@ -0,0 +1,28 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct u0_counters
{
uint c;
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
return uint2(tc % 4096, tc / 4096);
}
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
float4 r0;
r0.x = as_type<float>(_29);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
}

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 0
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIDecrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.0
; Generator: Wine VKD3D Shader Compiler; 0
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %8 "u0"
OpName %9 "u0_counters"
OpMemberName %9 0 "c"
OpName %11 "u0_counter"
OpName %15 "vThreadID"
OpName %19 "r0"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 BufferBlock
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 0
OpDecorate %15 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeStruct %5
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpTypeInt 32 1
%13 = OpTypeVector %12 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 4
%18 = OpTypePointer Function %17
%20 = OpTypePointer Uniform %5
%21 = OpConstant %5 0
%23 = OpConstant %5 1
%26 = OpTypePointer Function %16
%33 = OpConstant %12 0
%34 = OpConstant %5 2
%37 = OpTypePointer Input %12
%41 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%19 = OpVariable %18 Function
%22 = OpAccessChain %20 %11 %21
%24 = OpAtomicIIncrement %5 %22 %23 %21
%25 = OpBitcast %16 %24
%27 = OpInBoundsAccessChain %26 %19 %21
OpStore %27 %25
%28 = OpLoad %6 %8
%29 = OpInBoundsAccessChain %26 %19 %21
%30 = OpLoad %16 %29
%31 = OpBitcast %12 %30
%32 = OpIMul %5 %31 %23
%35 = OpShiftRightLogical %5 %33 %34
%36 = OpIAdd %5 %32 %35
%38 = OpInBoundsAccessChain %37 %15 %21
%39 = OpLoad %12 %38
%40 = OpBitcast %5 %39
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
OpImageWrite %28 %36 %42
OpReturn
OpFunctionEnd

View File

@ -1838,7 +1838,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t val = ops[6];
uint32_t comp = ops[7];
emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true,
ptr, comp, true, val);
ptr, comp, true, false, val);
break;
}
@ -1866,19 +1866,20 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
}
#define MSL_AFMO_IMPL(op, valsrc) \
do \
{ \
uint32_t result_type = ops[0]; \
uint32_t id = ops[1]; \
uint32_t ptr = ops[2]; \
uint32_t mem_sem = ops[4]; \
uint32_t val = valsrc; \
emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val); \
#define MSL_AFMO_IMPL(op, valsrc, valconst) \
do \
{ \
uint32_t result_type = ops[0]; \
uint32_t id = ops[1]; \
uint32_t ptr = ops[2]; \
uint32_t mem_sem = ops[4]; \
uint32_t val = valsrc; \
emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \
false, valconst); \
} while (false)
#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5])
#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1)
#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false)
#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true)
case OpAtomicIIncrement:
MSL_AFMIO(add);
@ -2331,7 +2332,7 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
bool op1_is_pointer, uint32_t op2)
bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
{
forced_temporaries.insert(result_id);
@ -2380,7 +2381,12 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
{
assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0);
if (op1)
exp += ", " + to_expression(op1);
{
if (op1_is_literal)
exp += join(", ", op1);
else
exp += ", " + to_expression(op1);
}
if (op2)
exp += ", " + to_expression(op2);

View File

@ -365,7 +365,7 @@ protected:
std::string get_argument_address_space(const SPIRVariable &argument);
void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
bool op1_is_pointer = false, uint32_t op2 = 0);
bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
const char *get_memory_order(uint32_t spv_mem_sem);
void add_pragma_line(const std::string &line);
void add_typedef_line(const std::string &line);