MSL: Return early from helper tesc invocations.
Return after loading the input control point array if there are more input points than output points, and this was one of the helper invocations spun off to load the input points. I was hesitant to do this initially, since the MSL spec has this to say about barriers: > The `threadgroup_barrier` (or `simdgroup_barrier`) function must be > encountered by all threads in a threadgroup (or SIMD-group) executing > the kernel. That is, if any thread executes the barrier, then all threads must execute it, or the barrier'd invocations will hang. But, the key words here seem to be "executing the kernel;" inactive invocations, those that have already returned, need not encounter the barrier to prevent hangs. Indeed, I've encountered no problems from doing this, at least on my hardware. This also fixes a few CTS tests that were failing due to execution ordering; apparently, my assumption that the later, invalid data written by the helpers would get overwritten was wrong.
This commit is contained in:
parent
5dde82654c
commit
a43dcd7b99
@ -52,6 +52,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 3)
|
||||||
|
return;
|
||||||
VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } };
|
VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } };
|
||||||
VertexOutput param[3];
|
VertexOutput param[3];
|
||||||
spvArrayCopyFromStack1(param, _223);
|
spvArrayCopyFromStack1(param, _223);
|
||||||
|
@ -25,6 +25,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 4)
|
||||||
|
return;
|
||||||
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
||||||
threadgroup_barrier(mem_flags::mem_device);
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
if (gl_InvocationID == 0)
|
if (gl_InvocationID == 0)
|
||||||
|
@ -25,6 +25,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 1)
|
||||||
|
return;
|
||||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
|
||||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
|
||||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
|
||||||
|
@ -26,6 +26,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 4)
|
||||||
|
return;
|
||||||
Boo vInput_24;
|
Boo vInput_24;
|
||||||
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
|
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
|
||||||
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
|
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
|
||||||
|
@ -30,6 +30,8 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 1)
|
||||||
|
return;
|
||||||
float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy;
|
float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy;
|
||||||
float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
|
float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
|
||||||
float3 _445 = float3(_430.x, -10.0, _430.y);
|
float3 _445 = float3(_430.x, -10.0, _430.y);
|
||||||
|
@ -82,6 +82,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 3)
|
||||||
|
return;
|
||||||
VertexOutput p[3];
|
VertexOutput p[3];
|
||||||
p[0].pos = gl_in[0].gl_Position;
|
p[0].pos = gl_in[0].gl_Position;
|
||||||
p[0].uv = gl_in[0].VertexOutput_uv;
|
p[0].uv = gl_in[0].VertexOutput_uv;
|
||||||
|
@ -25,6 +25,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 4)
|
||||||
|
return;
|
||||||
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
||||||
threadgroup_barrier(mem_flags::mem_device);
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
if (gl_InvocationID == 0)
|
if (gl_InvocationID == 0)
|
||||||
|
@ -32,6 +32,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 1)
|
||||||
|
return;
|
||||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
|
||||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
|
||||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
|
||||||
|
@ -26,6 +26,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 4)
|
||||||
|
return;
|
||||||
Boo vInput_24;
|
Boo vInput_24;
|
||||||
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
|
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
|
||||||
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
|
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
|
||||||
|
@ -112,6 +112,8 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& v_41 [[buffer(0)]], ui
|
|||||||
if (gl_InvocationID < spvIndirectParams[0])
|
if (gl_InvocationID < spvIndirectParams[0])
|
||||||
gl_in[gl_InvocationID] = in;
|
gl_in[gl_InvocationID] = in;
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (gl_InvocationID >= 1)
|
||||||
|
return;
|
||||||
float2 p0 = gl_in[0].vPatchPosBase;
|
float2 p0 = gl_in[0].vPatchPosBase;
|
||||||
float2 param = p0;
|
float2 param = p0;
|
||||||
if (!frustum_cull(param, v_41))
|
if (!frustum_cull(param, v_41))
|
||||||
|
@ -1796,6 +1796,8 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
|
|||||||
statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref,
|
statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref,
|
||||||
";");
|
";");
|
||||||
statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
|
statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
|
||||||
|
statement("if (", to_expression(builtin_invocation_id_id), " >= ", get_entry_point().output_vertices, ")");
|
||||||
|
statement(" return;");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user