MSL: Workaround compiler issue with image fence when used as reference.
This commit is contained in:
parent
218a8bfd86
commit
ee77265ae5
@ -5,10 +5,13 @@
|
|||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||||
{
|
{
|
||||||
TargetTexture.fence();
|
spvImageFence(TargetTexture);
|
||||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||||
float2 storeTemp = loaded + float2(1.0);
|
float2 storeTemp = loaded + float2(1.0);
|
||||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||||
|
@ -25,6 +25,9 @@ struct spvDescriptorArray
|
|||||||
const device spvDescriptor<T>* ptr;
|
const device spvDescriptor<T>* ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
struct B10
|
struct B10
|
||||||
{
|
{
|
||||||
float v;
|
float v;
|
||||||
@ -136,20 +139,20 @@ kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buff
|
|||||||
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
|
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
|
||||||
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
||||||
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
||||||
u0[2].fence();
|
spvImageFence(u0[2]);
|
||||||
_292.z = as_type<float>(u0[2].read(uint(0)).x);
|
_292.z = as_type<float>(u0[2].read(uint(0)).x);
|
||||||
float4 _448;
|
float4 _448;
|
||||||
_448.x = spvDescriptorSet3.b10[3]->v;
|
_448.x = spvDescriptorSet3.b10[3]->v;
|
||||||
_448.y = b11[4]->v;
|
_448.y = b11[4]->v;
|
||||||
u1[2].fence();
|
spvImageFence(u1[2]);
|
||||||
_448.z = as_type<float>(u1[2].read(uint(0)).x);
|
_448.z = as_type<float>(u1[2].read(uint(0)).x);
|
||||||
float _342 = spvDescriptorSet2.b20[3]->v;
|
float _342 = spvDescriptorSet2.b20[3]->v;
|
||||||
u2[2].fence();
|
spvImageFence(u2[2]);
|
||||||
uint _356 = b30[gl_WorkGroupID.x]->i;
|
uint _356 = b30[gl_WorkGroupID.x]->i;
|
||||||
uint _388 = _356 + 6u;
|
uint _388 = _356 + 6u;
|
||||||
u3[_388].fence();
|
spvImageFence(u3[_388]);
|
||||||
float _410 = (*spvDescriptorSet4.b40).v;
|
float _410 = (*spvDescriptorSet4.b40).v;
|
||||||
u4.fence();
|
spvImageFence(u4);
|
||||||
u0[0].write(as_type<uint4>(_292), uint(0));
|
u0[0].write(as_type<uint4>(_292), uint(0));
|
||||||
u1[0].write(as_type<uint4>(_448), uint(0));
|
u1[0].write(as_type<uint4>(_448), uint(0));
|
||||||
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));
|
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));
|
||||||
|
@ -1,11 +1,16 @@
|
|||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
|
||||||
#include <metal_stdlib>
|
#include <metal_stdlib>
|
||||||
#include <simd/simd.h>
|
#include <simd/simd.h>
|
||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||||
{
|
{
|
||||||
uImageArray.fence();
|
spvImageFence(uImageArray);
|
||||||
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
|
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
|
||||||
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||||
}
|
}
|
||||||
|
@ -5,10 +5,13 @@
|
|||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||||
{
|
{
|
||||||
TargetTexture.fence();
|
spvImageFence(TargetTexture);
|
||||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||||
float2 storeTemp = loaded + float2(1.0);
|
float2 storeTemp = loaded + float2(1.0);
|
||||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||||
|
@ -25,6 +25,9 @@ struct spvDescriptorArray
|
|||||||
const device spvDescriptor<T>* ptr;
|
const device spvDescriptor<T>* ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
struct B10
|
struct B10
|
||||||
{
|
{
|
||||||
float v;
|
float v;
|
||||||
@ -122,16 +125,16 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
|
|||||||
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
|
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
|
||||||
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
|
||||||
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
|
||||||
u0[2].fence();
|
spvImageFence(u0[2]);
|
||||||
r0.z = as_type<float>(u0[2].read(uint(0)).x);
|
r0.z = as_type<float>(u0[2].read(uint(0)).x);
|
||||||
r1.x = b10[3]->v;
|
r1.x = b10[3]->v;
|
||||||
r1.y = b11[4]->v;
|
r1.y = b11[4]->v;
|
||||||
u1[2].fence();
|
spvImageFence(u1[2]);
|
||||||
r1.z = as_type<float>(u1[2].read(uint(0)).x);
|
r1.z = as_type<float>(u1[2].read(uint(0)).x);
|
||||||
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
|
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
|
||||||
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
|
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
|
||||||
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
|
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
|
||||||
u2[2].fence();
|
spvImageFence(u2[2]);
|
||||||
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
|
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
|
||||||
r2.w = b21[4]->v;
|
r2.w = b21[4]->v;
|
||||||
uint i = b30[gl_WorkGroupID.x]->i;
|
uint i = b30[gl_WorkGroupID.x]->i;
|
||||||
@ -140,13 +143,13 @@ void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00
|
|||||||
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
|
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
|
||||||
r3.z = b31[i + 5u]->v;
|
r3.z = b31[i + 5u]->v;
|
||||||
uint _218 = i + 6u;
|
uint _218 = i + 6u;
|
||||||
u3[_218].fence();
|
spvImageFence(u3[_218]);
|
||||||
r3.w = as_type<float>(u3[_218].read(uint(0)).x);
|
r3.w = as_type<float>(u3[_218].read(uint(0)).x);
|
||||||
r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
|
r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
|
||||||
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
|
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
|
||||||
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
|
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
|
||||||
r4.z = b40.v + b41.v;
|
r4.z = b40.v + b41.v;
|
||||||
u4.fence();
|
spvImageFence(u4);
|
||||||
r4.w = as_type<float>(u4.read(uint(0)).x);
|
r4.w = as_type<float>(u4.read(uint(0)).x);
|
||||||
u0[0].write(as_type<uint4>(r0), uint(0));
|
u0[0].write(as_type<uint4>(r0), uint(0));
|
||||||
u1[0].write(as_type<uint4>(r1), uint(0));
|
u1[0].write(as_type<uint4>(r1), uint(0));
|
||||||
|
@ -1,12 +1,17 @@
|
|||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
|
||||||
#include <metal_stdlib>
|
#include <metal_stdlib>
|
||||||
#include <simd/simd.h>
|
#include <simd/simd.h>
|
||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
void spvImageFence(ImageT img) { img.fence(); }
|
||||||
|
|
||||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||||
{
|
{
|
||||||
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
|
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
|
||||||
uImageArray.fence();
|
spvImageFence(uImageArray);
|
||||||
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
|
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
|
||||||
uImage.write(a, uint2(int2(2, 3)));
|
uImage.write(a, uint2(int2(2, 3)));
|
||||||
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||||
|
@ -7492,6 +7492,12 @@ void CompilerMSL::emit_custom_functions()
|
|||||||
statement("");
|
statement("");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SPVFuncImplImageFence:
|
||||||
|
statement("template <typename ImageT>");
|
||||||
|
statement("void spvImageFence(ImageT img) { img.fence(); }");
|
||||||
|
statement("");
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -8968,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
|||||||
|
|
||||||
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
|
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
|
||||||
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
|
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
|
||||||
statement(to_expression(img_id), ".fence();");
|
{
|
||||||
|
add_spv_func_and_recompile(SPVFuncImplImageFence);
|
||||||
|
// Need to wrap this with a value type,
|
||||||
|
// since the Metal headers are broken and do not consider case when the image is a reference.
|
||||||
|
statement("spvImageFence(", to_expression(img_id), ");");
|
||||||
|
}
|
||||||
|
|
||||||
emit_texture_op(instruction, false);
|
emit_texture_op(instruction, false);
|
||||||
break;
|
break;
|
||||||
|
@ -824,7 +824,8 @@ protected:
|
|||||||
SPVFuncImplVariableSizedDescriptor,
|
SPVFuncImplVariableSizedDescriptor,
|
||||||
SPVFuncImplVariableDescriptorArray,
|
SPVFuncImplVariableDescriptorArray,
|
||||||
SPVFuncImplPaddedStd140,
|
SPVFuncImplPaddedStd140,
|
||||||
SPVFuncImplReduceAdd
|
SPVFuncImplReduceAdd,
|
||||||
|
SPVFuncImplImageFence
|
||||||
};
|
};
|
||||||
|
|
||||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
|
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
|
||||||
|
Loading…
Reference in New Issue
Block a user