Merge pull request #2153 from etang-cw/TexFence

[WIP] MSL: Prevent RAW hazards on read_write textures
This commit is contained in:
Hans-Kristian Arntzen 2023-08-17 11:05:55 +02:00 committed by GitHub
commit 637c211c6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 15 additions and 1 deletions

View File

@ -8,6 +8,7 @@ using namespace metal;
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -5,6 +5,7 @@ using namespace metal;
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
uImageArray.fence();
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
}

View File

@ -8,6 +8,7 @@ using namespace metal;
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -6,6 +6,7 @@ using namespace metal;
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
uImageArray.fence();
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
uImage.write(a, uint2(int2(2, 3)));
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));

View File

@ -8675,9 +8675,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
// Mark that this shader reads from this image
uint32_t img_id = ops[2];
auto &type = expression_type(img_id);
auto *p_var = maybe_get_backing_variable(img_id);
if (type.image.dim != DimSubpassData)
{
auto *p_var = maybe_get_backing_variable(img_id);
if (p_var && has_decoration(p_var->self, DecorationNonReadable))
{
unset_decoration(p_var->self, DecorationNonReadable);
@ -8685,6 +8685,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
}
}
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
statement(to_expression(img_id), ".fence();");
emit_texture_op(instruction, false);
break;
}

View File

@ -496,6 +496,12 @@ public:
// so it can be enabled only when the bug is present.
bool sample_dref_lod_array_as_grad = false;
// MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures.
// This inserts fences before each read of a read_write texture to ensure coherency.
// If you're sure you never rely on this, you can set this to false for a possible performance improvement.
// Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this.
bool readwrite_texture_fences = true;
bool is_ios() const
{
return platform == iOS;