MSL: Workaround compiler issue with image fence when used as reference.

This commit is contained in:
Hans-Kristian Arntzen 2024-04-03 13:28:12 +02:00
parent 218a8bfd86
commit ee77265ae5
8 changed files with 50 additions and 16 deletions

View File

@ -5,10 +5,13 @@
using namespace metal; using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture) void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{ {
TargetTexture.fence(); spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0); float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u)))); TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -25,6 +25,9 @@ struct spvDescriptorArray
const device spvDescriptor<T>* ptr; const device spvDescriptor<T>* ptr;
}; };
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10 struct B10
{ {
float v; float v;
@ -136,20 +139,20 @@ kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buff
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0)); float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x); _292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x); _292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
u0[2].fence(); spvImageFence(u0[2]);
_292.z = as_type<float>(u0[2].read(uint(0)).x); _292.z = as_type<float>(u0[2].read(uint(0)).x);
float4 _448; float4 _448;
_448.x = spvDescriptorSet3.b10[3]->v; _448.x = spvDescriptorSet3.b10[3]->v;
_448.y = b11[4]->v; _448.y = b11[4]->v;
u1[2].fence(); spvImageFence(u1[2]);
_448.z = as_type<float>(u1[2].read(uint(0)).x); _448.z = as_type<float>(u1[2].read(uint(0)).x);
float _342 = spvDescriptorSet2.b20[3]->v; float _342 = spvDescriptorSet2.b20[3]->v;
u2[2].fence(); spvImageFence(u2[2]);
uint _356 = b30[gl_WorkGroupID.x]->i; uint _356 = b30[gl_WorkGroupID.x]->i;
uint _388 = _356 + 6u; uint _388 = _356 + 6u;
u3[_388].fence(); spvImageFence(u3[_388]);
float _410 = (*spvDescriptorSet4.b40).v; float _410 = (*spvDescriptorSet4.b40).v;
u4.fence(); spvImageFence(u4);
u0[0].write(as_type<uint4>(_292), uint(0)); u0[0].write(as_type<uint4>(_292), uint(0));
u1[0].write(as_type<uint4>(_448), uint(0)); u1[0].write(as_type<uint4>(_448), uint(0));
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0)); u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));

View File

@ -1,11 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib> #include <metal_stdlib>
#include <simd/simd.h> #include <simd/simd.h>
using namespace metal; using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]]) fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{ {
uImageArray.fence(); spvImageFence(uImageArray);
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3))); uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z)); uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
} }

View File

@ -5,10 +5,13 @@
using namespace metal; using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture) void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{ {
TargetTexture.fence(); spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0); float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u)))); TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -25,6 +25,9 @@ struct spvDescriptorArray
const device spvDescriptor<T>* ptr; const device spvDescriptor<T>* ptr;
}; };
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10 struct B10
{ {
float v; float v;
@ -122,16 +125,16 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0)); r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x); r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x); r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
u0[2].fence(); spvImageFence(u0[2]);
r0.z = as_type<float>(u0[2].read(uint(0)).x); r0.z = as_type<float>(u0[2].read(uint(0)).x);
r1.x = b10[3]->v; r1.x = b10[3]->v;
r1.y = b11[4]->v; r1.y = b11[4]->v;
u1[2].fence(); spvImageFence(u1[2]);
r1.z = as_type<float>(u1[2].read(uint(0)).x); r1.z = as_type<float>(u1[2].read(uint(0)).x);
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0)); r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x); r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x); r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
u2[2].fence(); spvImageFence(u2[2]);
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x); r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
r2.w = b21[4]->v; r2.w = b21[4]->v;
uint i = b30[gl_WorkGroupID.x]->i; uint i = b30[gl_WorkGroupID.x]->i;
@ -140,13 +143,13 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x); r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
r3.z = b31[i + 5u]->v; r3.z = b31[i + 5u]->v;
uint _218 = i + 6u; uint _218 = i + 6u;
u3[_218].fence(); spvImageFence(u3[_218]);
r3.w = as_type<float>(u3[_218].read(uint(0)).x); r3.w = as_type<float>(u3[_218].read(uint(0)).x);
r4 = float4(t40.sample(s40, float2(0.0), level(0.0))); r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x); r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x); r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
r4.z = b40.v + b41.v; r4.z = b40.v + b41.v;
u4.fence(); spvImageFence(u4);
r4.w = as_type<float>(u4.read(uint(0)).x); r4.w = as_type<float>(u4.read(uint(0)).x);
u0[0].write(as_type<uint4>(r0), uint(0)); u0[0].write(as_type<uint4>(r0), uint(0));
u1[0].write(as_type<uint4>(r1), uint(0)); u1[0].write(as_type<uint4>(r1), uint(0));

View File

@ -1,12 +1,17 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib> #include <metal_stdlib>
#include <simd/simd.h> #include <simd/simd.h>
using namespace metal; using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]]) fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{ {
float4 a = uImageMS.read(uint2(int2(1, 2)), 2); float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
uImageArray.fence(); spvImageFence(uImageArray);
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)); float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
uImage.write(a, uint2(int2(2, 3))); uImage.write(a, uint2(int2(2, 3)));
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z)); uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));

View File

@ -7492,6 +7492,12 @@ void CompilerMSL::emit_custom_functions()
statement(""); statement("");
break; break;
case SPVFuncImplImageFence:
statement("template <typename ImageT>");
statement("void spvImageFence(ImageT img) { img.fence(); }");
statement("");
break;
default: default:
break; break;
} }
@ -8968,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation // Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable)) if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
statement(to_expression(img_id), ".fence();"); {
add_spv_func_and_recompile(SPVFuncImplImageFence);
// Need to wrap this with a value type,
// since the Metal headers are broken and do not consider case when the image is a reference.
statement("spvImageFence(", to_expression(img_id), ");");
}
emit_texture_op(instruction, false); emit_texture_op(instruction, false);
break; break;

View File

@ -824,7 +824,8 @@ protected:
SPVFuncImplVariableSizedDescriptor, SPVFuncImplVariableSizedDescriptor,
SPVFuncImplVariableDescriptorArray, SPVFuncImplVariableDescriptorArray,
SPVFuncImplPaddedStd140, SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd SPVFuncImplReduceAdd,
SPVFuncImplImageFence
}; };
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too // If the underlying resource has been used for comparison then duplicate loads of that resource must be too