MSL: Workaround compiler issue with image fence when used as reference.

This commit is contained in:
Hans-Kristian Arntzen 2024-04-03 13:28:12 +02:00
parent 218a8bfd86
commit ee77265ae5
8 changed files with 50 additions and 16 deletions

View File

@ -5,10 +5,13 @@
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -25,6 +25,9 @@ struct spvDescriptorArray
const device spvDescriptor<T>* ptr;
};
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10
{
float v;
@ -136,20 +139,20 @@ kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buff
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
u0[2].fence();
spvImageFence(u0[2]);
_292.z = as_type<float>(u0[2].read(uint(0)).x);
float4 _448;
_448.x = spvDescriptorSet3.b10[3]->v;
_448.y = b11[4]->v;
u1[2].fence();
spvImageFence(u1[2]);
_448.z = as_type<float>(u1[2].read(uint(0)).x);
float _342 = spvDescriptorSet2.b20[3]->v;
u2[2].fence();
spvImageFence(u2[2]);
uint _356 = b30[gl_WorkGroupID.x]->i;
uint _388 = _356 + 6u;
u3[_388].fence();
spvImageFence(u3[_388]);
float _410 = (*spvDescriptorSet4.b40).v;
u4.fence();
spvImageFence(u4);
u0[0].write(as_type<uint4>(_292), uint(0));
u1[0].write(as_type<uint4>(_448), uint(0));
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));

View File

@ -1,11 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
uImageArray.fence();
spvImageFence(uImageArray);
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
}

View File

@ -5,10 +5,13 @@
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -25,6 +25,9 @@ struct spvDescriptorArray
const device spvDescriptor<T>* ptr;
};
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10
{
float v;
@ -122,16 +125,16 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
u0[2].fence();
spvImageFence(u0[2]);
r0.z = as_type<float>(u0[2].read(uint(0)).x);
r1.x = b10[3]->v;
r1.y = b11[4]->v;
u1[2].fence();
spvImageFence(u1[2]);
r1.z = as_type<float>(u1[2].read(uint(0)).x);
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
u2[2].fence();
spvImageFence(u2[2]);
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
r2.w = b21[4]->v;
uint i = b30[gl_WorkGroupID.x]->i;
@ -140,13 +143,13 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
r3.z = b31[i + 5u]->v;
uint _218 = i + 6u;
u3[_218].fence();
spvImageFence(u3[_218]);
r3.w = as_type<float>(u3[_218].read(uint(0)).x);
r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
r4.z = b40.v + b41.v;
u4.fence();
spvImageFence(u4);
r4.w = as_type<float>(u4.read(uint(0)).x);
u0[0].write(as_type<uint4>(r0), uint(0));
u1[0].write(as_type<uint4>(r1), uint(0));

View File

@ -1,12 +1,17 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
uImageArray.fence();
spvImageFence(uImageArray);
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
uImage.write(a, uint2(int2(2, 3)));
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));

View File

@ -7492,6 +7492,12 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplImageFence:
statement("template <typename ImageT>");
statement("void spvImageFence(ImageT img) { img.fence(); }");
statement("");
break;
default:
break;
}
@ -8968,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
statement(to_expression(img_id), ".fence();");
{
add_spv_func_and_recompile(SPVFuncImplImageFence);
// Need to wrap this with a value type,
// since the Metal headers are broken and do not consider case when the image is a reference.
statement("spvImageFence(", to_expression(img_id), ");");
}
emit_texture_op(instruction, false);
break;

View File

@ -824,7 +824,8 @@ protected:
SPVFuncImplVariableSizedDescriptor,
SPVFuncImplVariableDescriptorArray,
SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd
SPVFuncImplReduceAdd,
SPVFuncImplImageFence
};
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too