MSL: Workaround compiler issue with image fence when used as reference.
This commit is contained in:
parent
218a8bfd86
commit
ee77265ae5
@ -5,10 +5,13 @@
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||
{
|
||||
TargetTexture.fence();
|
||||
spvImageFence(TargetTexture);
|
||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||
float2 storeTemp = loaded + float2(1.0);
|
||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||
|
@ -25,6 +25,9 @@ struct spvDescriptorArray
|
||||
const device spvDescriptor<T>* ptr;
|
||||
};
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
struct B10
|
||||
{
|
||||
float v;
|
||||
@ -136,20 +139,20 @@ kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buff
|
||||
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
|
||||
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
||||
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
||||
u0[2].fence();
|
||||
spvImageFence(u0[2]);
|
||||
_292.z = as_type<float>(u0[2].read(uint(0)).x);
|
||||
float4 _448;
|
||||
_448.x = spvDescriptorSet3.b10[3]->v;
|
||||
_448.y = b11[4]->v;
|
||||
u1[2].fence();
|
||||
spvImageFence(u1[2]);
|
||||
_448.z = as_type<float>(u1[2].read(uint(0)).x);
|
||||
float _342 = spvDescriptorSet2.b20[3]->v;
|
||||
u2[2].fence();
|
||||
spvImageFence(u2[2]);
|
||||
uint _356 = b30[gl_WorkGroupID.x]->i;
|
||||
uint _388 = _356 + 6u;
|
||||
u3[_388].fence();
|
||||
spvImageFence(u3[_388]);
|
||||
float _410 = (*spvDescriptorSet4.b40).v;
|
||||
u4.fence();
|
||||
spvImageFence(u4);
|
||||
u0[0].write(as_type<uint4>(_292), uint(0));
|
||||
u1[0].write(as_type<uint4>(_448), uint(0));
|
||||
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));
|
||||
|
@ -1,11 +1,16 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||
{
|
||||
uImageArray.fence();
|
||||
spvImageFence(uImageArray);
|
||||
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
|
||||
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||
}
|
||||
|
@ -5,10 +5,13 @@
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||
{
|
||||
TargetTexture.fence();
|
||||
spvImageFence(TargetTexture);
|
||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||
float2 storeTemp = loaded + float2(1.0);
|
||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||
|
@ -25,6 +25,9 @@ struct spvDescriptorArray
|
||||
const device spvDescriptor<T>* ptr;
|
||||
};
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
struct B10
|
||||
{
|
||||
float v;
|
||||
@ -122,16 +125,16 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
|
||||
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
|
||||
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
||||
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
||||
u0[2].fence();
|
||||
spvImageFence(u0[2]);
|
||||
r0.z = as_type<float>(u0[2].read(uint(0)).x);
|
||||
r1.x = b10[3]->v;
|
||||
r1.y = b11[4]->v;
|
||||
u1[2].fence();
|
||||
spvImageFence(u1[2]);
|
||||
r1.z = as_type<float>(u1[2].read(uint(0)).x);
|
||||
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
|
||||
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
|
||||
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
|
||||
u2[2].fence();
|
||||
spvImageFence(u2[2]);
|
||||
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
|
||||
r2.w = b21[4]->v;
|
||||
uint i = b30[gl_WorkGroupID.x]->i;
|
||||
@ -140,13 +143,13 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
|
||||
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
|
||||
r3.z = b31[i + 5u]->v;
|
||||
uint _218 = i + 6u;
|
||||
u3[_218].fence();
|
||||
spvImageFence(u3[_218]);
|
||||
r3.w = as_type<float>(u3[_218].read(uint(0)).x);
|
||||
r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
|
||||
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
|
||||
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
|
||||
r4.z = b40.v + b41.v;
|
||||
u4.fence();
|
||||
spvImageFence(u4);
|
||||
r4.w = as_type<float>(u4.read(uint(0)).x);
|
||||
u0[0].write(as_type<uint4>(r0), uint(0));
|
||||
u1[0].write(as_type<uint4>(r1), uint(0));
|
||||
|
@ -1,12 +1,17 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template <typename ImageT>
|
||||
void spvImageFence(ImageT img) { img.fence(); }
|
||||
|
||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||
{
|
||||
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
|
||||
uImageArray.fence();
|
||||
spvImageFence(uImageArray);
|
||||
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
|
||||
uImage.write(a, uint2(int2(2, 3)));
|
||||
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||
|
@ -7492,6 +7492,12 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplImageFence:
|
||||
statement("template <typename ImageT>");
|
||||
statement("void spvImageFence(ImageT img) { img.fence(); }");
|
||||
statement("");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -8968,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
|
||||
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
|
||||
statement(to_expression(img_id), ".fence();");
|
||||
{
|
||||
add_spv_func_and_recompile(SPVFuncImplImageFence);
|
||||
// Need to wrap this with a value type,
|
||||
// since the Metal headers are broken and do not consider case when the image is a reference.
|
||||
statement("spvImageFence(", to_expression(img_id), ");");
|
||||
}
|
||||
|
||||
emit_texture_op(instruction, false);
|
||||
break;
|
||||
|
@ -824,7 +824,8 @@ protected:
|
||||
SPVFuncImplVariableSizedDescriptor,
|
||||
SPVFuncImplVariableDescriptorArray,
|
||||
SPVFuncImplPaddedStd140,
|
||||
SPVFuncImplReduceAdd
|
||||
SPVFuncImplReduceAdd,
|
||||
SPVFuncImplImageFence
|
||||
};
|
||||
|
||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
|
||||
|
Loading…
Reference in New Issue
Block a user