From 7a5d0d6b29a6f4d527f68af67918d46c5e114585 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 13 Oct 2020 11:41:29 -0500 Subject: [PATCH] MSL: Add missing interlock handling to atomic image buffers. --- .../pixel-interlock-ordered.msl2.argument.frag | 15 +++++++++++---- .../frag/pixel-interlock-ordered.msl2.frag | 11 ++++++++--- .../pixel-interlock-ordered.msl2.argument.frag | 15 +++++++++++---- .../frag/pixel-interlock-ordered.msl2.frag | 11 ++++++++--- .../pixel-interlock-ordered.msl2.argument.frag | 4 ++-- .../frag/pixel-interlock-ordered.msl2.frag | 4 ++-- spirv_msl.cpp | 5 ++++- 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag index f77b8ed9..dfef91c8 100644 --- a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -1,3 +1,4 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wunused-variable" #include @@ -28,16 +29,22 @@ struct spvDescriptorSetBuffer0 texture2d img4 [[id(1)]]; texture2d img [[id(2), raster_order_group(0)]]; texture2d img3 [[id(3), raster_order_group(0)]]; - volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; - device Buffer2* m_44 [[id(5), raster_order_group(0)]]; + texture2d img2 [[id(4), raster_order_group(0)]]; + device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]]; + volatile device Buffer* m_42 [[id(6), raster_order_group(0)]]; + device Buffer2* m_52 [[id(7), raster_order_group(0)]]; }; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y) + fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) { (*spvDescriptorSet0.m_9).baz = 0; spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); - (*spvDescriptorSet0.m_34).foo += 42; - uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed); + (*spvDescriptorSet0.m_42).foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed); } diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag index 803416c6..67b79d9d 100644 --- a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag +++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -1,3 +1,4 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wunused-variable" #include @@ -22,12 +23,16 @@ struct Buffer2 uint quux; }; -fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y) + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]], texture2d img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]]) { _9.baz = 0; img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); img.write(img3.read(uint2(int2(0))), uint2(int2(0))); - _34.foo += 42; - uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed); + _42.foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed); } diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag index f77b8ed9..dfef91c8 100644 --- a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -1,3 +1,4 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wunused-variable" #include @@ -28,16 +29,22 @@ struct spvDescriptorSetBuffer0 texture2d img4 [[id(1)]]; texture2d img [[id(2), raster_order_group(0)]]; texture2d img3 [[id(3), raster_order_group(0)]]; - volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; - device Buffer2* m_44 [[id(5), raster_order_group(0)]]; + texture2d img2 [[id(4), raster_order_group(0)]]; + device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]]; + volatile device Buffer* m_42 [[id(6), raster_order_group(0)]]; + device Buffer2* m_52 [[id(7), raster_order_group(0)]]; }; +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y) + fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) { (*spvDescriptorSet0.m_9).baz = 0; spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); - (*spvDescriptorSet0.m_34).foo += 42; - uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed); + (*spvDescriptorSet0.m_42).foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed); } diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag index 803416c6..67b79d9d 100644 --- a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag +++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -1,3 +1,4 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wunused-variable" #include @@ -22,12 +23,16 @@ struct Buffer2 uint quux; }; -fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics +#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y) + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]], texture2d img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]]) { _9.baz = 0; img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); img.write(img3.read(uint2(int2(0))), uint2(int2(0))); - _34.foo += 42; - uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); + uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed); + _42.foo += 42; + uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed); } diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag index 04886a67..ceac8cc5 100644 --- a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -4,7 +4,7 @@ layout(pixel_interlock_ordered) in; layout(binding = 0, rgba8) uniform writeonly image2D img; -//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 1, r32ui) uniform uimage2D img2; layout(binding = 2, rgba8) uniform readonly image2D img3; layout(binding = 3) coherent buffer Buffer { @@ -29,7 +29,7 @@ void main() imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); beginInvocationInterlockARB(); imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); - //imageAtomicAdd(img2, ivec2(0, 0), 1u); + imageAtomicAdd(img2, ivec2(0, 0), 1u); foo += 42; atomicAnd(bar, quux); endInvocationInterlockARB(); diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag index 04886a67..ceac8cc5 100644 --- a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag +++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -4,7 +4,7 @@ layout(pixel_interlock_ordered) in; layout(binding = 0, rgba8) uniform writeonly image2D img; -//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 1, r32ui) uniform uimage2D img2; layout(binding = 2, rgba8) uniform readonly image2D img3; layout(binding = 3) coherent buffer Buffer { @@ -29,7 +29,7 @@ void main() imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); beginInvocationInterlockARB(); imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); - //imageAtomicAdd(img2, ivec2(0, 0), 1u); + imageAtomicAdd(img2, ivec2(0, 0), 1u); foo += 42; atomicAnd(bar, quux); endInvocationInterlockARB(); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index c259d439..f8386a8d 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -10199,7 +10199,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { ep_args += ", device atomic_" + type_to_glsl(get(basetype.image.type), 0); ep_args += "* " + r.name + "_atomic"; - ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } break; }