Merge pull request #2153 from etang-cw/TexFence
[WIP] MSL: Prevent RAW hazards on read_write textures
This commit is contained in:
commit
637c211c6f
@ -8,6 +8,7 @@ using namespace metal;
|
||||
static inline __attribute__((always_inline))
|
||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||
{
|
||||
TargetTexture.fence();
|
||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||
float2 storeTemp = loaded + float2(1.0);
|
||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||
|
@ -5,6 +5,7 @@ using namespace metal;
|
||||
|
||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||
{
|
||||
uImageArray.fence();
|
||||
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
|
||||
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ using namespace metal;
|
||||
static inline __attribute__((always_inline))
|
||||
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
||||
{
|
||||
TargetTexture.fence();
|
||||
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
||||
float2 storeTemp = loaded + float2(1.0);
|
||||
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
||||
|
@ -6,6 +6,7 @@ using namespace metal;
|
||||
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
|
||||
{
|
||||
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
|
||||
uImageArray.fence();
|
||||
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
|
||||
uImage.write(a, uint2(int2(2, 3)));
|
||||
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
|
||||
|
@ -8675,9 +8675,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
// Mark that this shader reads from this image
|
||||
uint32_t img_id = ops[2];
|
||||
auto &type = expression_type(img_id);
|
||||
auto *p_var = maybe_get_backing_variable(img_id);
|
||||
if (type.image.dim != DimSubpassData)
|
||||
{
|
||||
auto *p_var = maybe_get_backing_variable(img_id);
|
||||
if (p_var && has_decoration(p_var->self, DecorationNonReadable))
|
||||
{
|
||||
unset_decoration(p_var->self, DecorationNonReadable);
|
||||
@ -8685,6 +8685,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
}
|
||||
|
||||
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
|
||||
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
|
||||
statement(to_expression(img_id), ".fence();");
|
||||
|
||||
emit_texture_op(instruction, false);
|
||||
break;
|
||||
}
|
||||
|
@ -496,6 +496,12 @@ public:
|
||||
// so it can be enabled only when the bug is present.
|
||||
bool sample_dref_lod_array_as_grad = false;
|
||||
|
||||
// MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures.
|
||||
// This inserts fences before each read of a read_write texture to ensure coherency.
|
||||
// If you're sure you never rely on this, you can set this to false for a possible performance improvement.
|
||||
// Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this.
|
||||
bool readwrite_texture_fences = true;
|
||||
|
||||
bool is_ios() const
|
||||
{
|
||||
return platform == iOS;
|
||||
|
Loading…
Reference in New Issue
Block a user