From 052c57e2f2ff0a009c7e451195b4c4e0206202ba Mon Sep 17 00:00:00 2001 From: Jan Sikorski Date: Fri, 9 Feb 2024 14:40:04 +0100 Subject: [PATCH 1/3] MSL: Pass texture array index separately to atomic texture operations. --- spirv_msl.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index a6dd8da9..24a18637 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -10196,9 +10196,34 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // Will only be false if we're in "force recompile later" mode. if (split_index != string::npos) - exp += join(obj_expression.substr(0, split_index), ".", op, "(", obj_expression.substr(split_index + 1)); + { + auto coord = obj_expression.substr(split_index + 1); + exp += join(obj_expression.substr(0, split_index), ".", op, "("); + if (res_type.basetype == SPIRType::Image && res_type.image.arrayed) + { + switch (res_type.image.dim) + { + case Dim1D: + exp += join(coord, ".x, ", coord, ".y"); + break; + case Dim2D: + exp += join(coord, ".xy, ", coord, ".z"); + break; + default: + // Texture cube arrays atomics are not supported by Metal. + assert(false); + break; + } + } + else + { + exp += coord; + } + } else + { exp += obj_expression; + } } else { From 7e0c9ab9ed75c0000760c0c7ea2e8e5020bf2dc3 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 26 Feb 2024 15:50:36 +0100 Subject: [PATCH 2/3] MSL: Nit from review. --- spirv_msl.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 24a18637..597c1239 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -10210,9 +10210,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, exp += join(coord, ".xy, ", coord, ".z"); break; default: - // Texture cube arrays atomics are not supported by Metal. - assert(false); - break; + SPIRV_CROSS_THROW("Cannot do atomics on Cube textures."); } } else From 855a5c369f64bccb98df5873fbd594fb9bccb828 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 26 Feb 2024 15:43:29 +0100 Subject: [PATCH 3/3] MSL: Also test image atomic for image2DArray. --- .../shaders-msl/comp/atomic-image.msl31.comp | 89 ++++++++++--------- .../shaders-msl/comp/atomic-image.msl31.comp | 89 ++++++++++--------- shaders-msl/comp/atomic-image.msl31.comp | 4 +- 3 files changed, 93 insertions(+), 89 deletions(-) diff --git a/reference/opt/shaders-msl/comp/atomic-image.msl31.comp b/reference/opt/shaders-msl/comp/atomic-image.msl31.comp index 1fefb3a7..8797d86d 100644 --- a/reference/opt/shaders-msl/comp/atomic-image.msl31.comp +++ b/reference/opt/shaders-msl/comp/atomic-image.msl31.comp @@ -14,59 +14,60 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); -kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d uImage [[texture(0)]], texture2d iImage [[texture(1)]]) +kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d uImage [[texture(0)]], texture2d_array uImageArray [[texture(1)]], texture2d iImage [[texture(2)]]) { uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; - uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; - iImage.write(int4(int(_27)), uint2(int2(1, 6))); - uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x; - uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x; - uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x; - uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x; - uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x; - uint _44; - uint4 _102; + uint _27 = uImageArray.atomic_fetch_add(uint3(int3(1, 5, 8)).xy, uint3(int3(1, 5, 8)).z, 1u).x; + uint _35 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; + iImage.write(int4(int(_35)), uint2(int2(1, 6))); + uint _40 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x; + uint _42 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x; + uint _44 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x; + uint _46 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x; + uint _48 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x; + uint _52; + uint4 _110; do { - _102.x = 10u; - } while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u); - _44 = _102.x; - int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x; - int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x; - int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x; - int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x; - int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x; - int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x; - int _61; - int4 _104; + _110.x = 10u; + } while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_110, 2u) && _110.x == 10u); + _52 = _110.x; + int _55 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x; + int _57 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x; + int _59 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x; + int _61 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x; + int _63 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x; + int _65 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x; + int _69; + int4 _112; do { - _104.x = 10; - } while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10); - _61 = _104.x; - uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _82; + _112.x = 10; + } while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_112, 2) && _112.x == 10); + _69 = _112.x; + uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _78 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _80 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _82 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _84 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _86 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _88 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _90; do { - _82 = 10u; - } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u); - int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _99; + _90 = 10u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_90, 2u, memory_order_relaxed, memory_order_relaxed) && _90 == 10u); + int _93 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _95 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _97 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _99 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _101 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _103 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _105 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _107; do { - _99 = 10; - } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10); + _107 = 10; + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_107, 2, memory_order_relaxed, memory_order_relaxed) && _107 == 10); } diff --git a/reference/shaders-msl/comp/atomic-image.msl31.comp b/reference/shaders-msl/comp/atomic-image.msl31.comp index 1fefb3a7..8797d86d 100644 --- a/reference/shaders-msl/comp/atomic-image.msl31.comp +++ b/reference/shaders-msl/comp/atomic-image.msl31.comp @@ -14,59 +14,60 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); -kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d uImage [[texture(0)]], texture2d iImage [[texture(1)]]) +kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d uImage [[texture(0)]], texture2d_array uImageArray [[texture(1)]], texture2d iImage [[texture(2)]]) { uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; - uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; - iImage.write(int4(int(_27)), uint2(int2(1, 6))); - uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x; - uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x; - uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x; - uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x; - uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x; - uint _44; - uint4 _102; + uint _27 = uImageArray.atomic_fetch_add(uint3(int3(1, 5, 8)).xy, uint3(int3(1, 5, 8)).z, 1u).x; + uint _35 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; + iImage.write(int4(int(_35)), uint2(int2(1, 6))); + uint _40 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x; + uint _42 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x; + uint _44 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x; + uint _46 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x; + uint _48 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x; + uint _52; + uint4 _110; do { - _102.x = 10u; - } while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u); - _44 = _102.x; - int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x; - int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x; - int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x; - int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x; - int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x; - int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x; - int _61; - int4 _104; + _110.x = 10u; + } while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_110, 2u) && _110.x == 10u); + _52 = _110.x; + int _55 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x; + int _57 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x; + int _59 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x; + int _61 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x; + int _63 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x; + int _65 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x; + int _69; + int4 _112; do { - _104.x = 10; - } while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10); - _61 = _104.x; - uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _82; + _112.x = 10; + } while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_112, 2) && _112.x == 10); + _69 = _112.x; + uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _78 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _80 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _82 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _84 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _86 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _88 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _90; do { - _82 = 10u; - } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u); - int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _99; + _90 = 10u; + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_90, 2u, memory_order_relaxed, memory_order_relaxed) && _90 == 10u); + int _93 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _95 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _97 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _99 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _101 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _103 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _105 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _107; do { - _99 = 10; - } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10); + _107 = 10; + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_107, 2, memory_order_relaxed, memory_order_relaxed) && _107 == 10); } diff --git a/shaders-msl/comp/atomic-image.msl31.comp b/shaders-msl/comp/atomic-image.msl31.comp index 703256d8..2aa650bc 100644 --- a/shaders-msl/comp/atomic-image.msl31.comp +++ b/shaders-msl/comp/atomic-image.msl31.comp @@ -4,7 +4,8 @@ layout(local_size_x = 1) in; layout(r32ui, binding = 0) uniform highp uimage2D uImage; layout(r32i, binding = 1) uniform highp iimage2D iImage; -layout(binding = 2, std430) buffer SSBO +layout(r32ui, binding = 2) uniform highp uimage2DArray uImageArray; +layout(binding = 3, std430) buffer SSBO { uint u32; int i32; @@ -13,6 +14,7 @@ layout(binding = 2, std430) buffer SSBO void main() { imageAtomicAdd(uImage, ivec2(1, 5), 1u); + imageAtomicAdd(uImageArray, ivec3(1, 5, 8), 1u); // Test that we do not invalidate OpImage variables which are loaded from UniformConstant // address space.