diff --git a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp index 346f40ee..07378624 100644 --- a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -5,10 +5,13 @@ using namespace metal; +template +void spvImageFence(ImageT img) { img.fence(); } + static inline __attribute__((always_inline)) void _main(thread const uint3& id, texture2d TargetTexture) { - TargetTexture.fence(); + spvImageFence(TargetTexture); float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 storeTemp = loaded + float2(1.0); TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u)))); diff --git a/reference/opt/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp b/reference/opt/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp index 2ea07486..3ba15779 100644 --- a/reference/opt/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp +++ b/reference/opt/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp @@ -25,6 +25,9 @@ struct spvDescriptorArray const device spvDescriptor* ptr; }; +template +void spvImageFence(ImageT img) { img.fence(); } + struct B10 { float v; @@ -136,20 +139,20 @@ kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buff float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0)); _292.x = as_type(t01[1].read(uint2(int2(0)), 0).x); _292.y = as_type(t02[2].read(uint2(int2(0)), 0).x); - u0[2].fence(); + spvImageFence(u0[2]); _292.z = as_type(u0[2].read(uint(0)).x); float4 _448; _448.x = spvDescriptorSet3.b10[3]->v; _448.y = b11[4]->v; - u1[2].fence(); + spvImageFence(u1[2]); _448.z = as_type(u1[2].read(uint(0)).x); float _342 = spvDescriptorSet2.b20[3]->v; - u2[2].fence(); + spvImageFence(u2[2]); uint _356 = b30[gl_WorkGroupID.x]->i; uint _388 = _356 + 6u; - u3[_388].fence(); + spvImageFence(u3[_388]); float _410 = (*spvDescriptorSet4.b40).v; - u4.fence(); + spvImageFence(u4); u0[0].write(as_type(_292), uint(0)); u1[0].write(as_type(_448), uint(0)); u2[0].write(as_type(float4(as_type(t21[1].read(uint2(int2(0)), 0).x), as_type(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type(u2[2].read(uint(0)).x), b21[4]->v)), uint(0)); diff --git a/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag b/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag index 30046a1f..2dc6a315 100644 --- a/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag +++ b/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag @@ -1,11 +1,16 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + #include #include using namespace metal; +template +void spvImageFence(ImageT img) { img.fence(); } + fragment void main0(texture2d_ms uImageMS [[texture(0)]], texture2d_array uImageArray [[texture(1)]], texture2d uImage [[texture(2)]]) { - uImageArray.fence(); + spvImageFence(uImageArray); uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3))); uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z)); } diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp index 346f40ee..07378624 100644 --- a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -5,10 +5,13 @@ using namespace metal; +template +void spvImageFence(ImageT img) { img.fence(); } + static inline __attribute__((always_inline)) void _main(thread const uint3& id, texture2d TargetTexture) { - TargetTexture.fence(); + spvImageFence(TargetTexture); float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 storeTemp = loaded + float2(1.0); TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u)))); diff --git a/reference/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp b/reference/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp index f51badb8..06a15fea 100644 --- a/reference/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp +++ b/reference/shaders-msl/comp/overlapping-bindings.msl31.argument.argument-tier-1.decoration-binding.device-argument-buffer.texture-buffer-native.comp @@ -25,6 +25,9 @@ struct spvDescriptorArray const device spvDescriptor* ptr; }; +template +void spvImageFence(ImageT img) { img.fence(); } + struct B10 { float v; @@ -122,16 +125,16 @@ void in_function(thread float4& r0, const device array, 8>& t00 r0 = t00[0].sample(s00[3], float2(0.0), level(0.0)); r0.x = as_type(t01[1].read(uint2(int2(0)), 0).x); r0.y = as_type(t02[2].read(uint2(int2(0)), 0).x); - u0[2].fence(); + spvImageFence(u0[2]); r0.z = as_type(u0[2].read(uint(0)).x); r1.x = b10[3]->v; r1.y = b11[4]->v; - u1[2].fence(); + spvImageFence(u1[2]); r1.z = as_type(u1[2].read(uint(0)).x); r2 = t20[0].sample(s20[3], float2(0.0), level(0.0)); r2.x = as_type(t21[1].read(uint2(int2(0)), 0).x); r2.y = as_type(t22[2].read(uint2(int2(0)), 0).x); - u2[2].fence(); + spvImageFence(u2[2]); r2.z = b20[3]->v + as_type(u2[2].read(uint(0)).x); r2.w = b21[4]->v; uint i = b30[gl_WorkGroupID.x]->i; @@ -140,13 +143,13 @@ void in_function(thread float4& r0, const device array, 8>& t00 r3.y = as_type(t32[i + 3u].read(uint2(int2(0)), 0).x); r3.z = b31[i + 5u]->v; uint _218 = i + 6u; - u3[_218].fence(); + spvImageFence(u3[_218]); r3.w = as_type(u3[_218].read(uint(0)).x); r4 = float4(t40.sample(s40, float2(0.0), level(0.0))); r4.x = as_type(t41.read(uint2(int2(0)), 0).x); r4.y = as_type(t42.read(uint2(int2(0)), 0).x); r4.z = b40.v + b41.v; - u4.fence(); + spvImageFence(u4); r4.w = as_type(u4.read(uint(0)).x); u0[0].write(as_type(r0), uint(0)); u1[0].write(as_type(r1), uint(0)); diff --git a/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag b/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag index d413563a..18000820 100644 --- a/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag +++ b/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag @@ -1,12 +1,17 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + #include #include using namespace metal; +template +void spvImageFence(ImageT img) { img.fence(); } + fragment void main0(texture2d_ms uImageMS [[texture(0)]], texture2d_array uImageArray [[texture(1)]], texture2d uImage [[texture(2)]]) { float4 a = uImageMS.read(uint2(int2(1, 2)), 2); - uImageArray.fence(); + spvImageFence(uImageArray); float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)); uImage.write(a, uint2(int2(2, 3))); uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z)); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f872997f..f27f71b7 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -7492,6 +7492,12 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplImageFence: + statement("template "); + statement("void spvImageFence(ImageT img) { img.fence(); }"); + statement(""); + break; + default: break; } @@ -8968,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) // Metal requires explicit fences to break up RAW hazards, even within the same shader invocation if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable)) - statement(to_expression(img_id), ".fence();"); + { + add_spv_func_and_recompile(SPVFuncImplImageFence); + // Need to wrap this with a value type, + // since the Metal headers are broken and do not consider case when the image is a reference. + statement("spvImageFence(", to_expression(img_id), ");"); + } emit_texture_op(instruction, false); break; diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 2ac6fe31..f1b64054 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -824,7 +824,8 @@ protected: SPVFuncImplVariableSizedDescriptor, SPVFuncImplVariableDescriptorArray, SPVFuncImplPaddedStd140, - SPVFuncImplReduceAdd + SPVFuncImplReduceAdd, + SPVFuncImplImageFence }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too