MSL: Hoist out to_tesc_invocation_id() in more places.

When emitting fixup code, we might not have gl_InvocationID yet.
This commit is contained in:
Hans-Kristian Arntzen 2021-04-16 11:26:47 +02:00
parent 75ed73818c
commit 7b9a591aa7
10 changed files with 57 additions and 41 deletions

View File

@ -72,7 +72,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup C spvStoragec[8][4];
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
c[gl_InvocationID] = _18[gl_InvocationID];
c[gl_GlobalInvocationID.x % 4] = _18[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.P_v = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -69,7 +69,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
gl_out[gl_GlobalInvocationID.x % 4].C_v = _18[gl_GlobalInvocationID.x % 4].v;
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup P spvStoragep[8];
threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];

View File

@ -84,13 +84,13 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
gl_out[gl_InvocationID].gl_Position = _33[gl_GlobalInvocationID].gl_Position;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _33[gl_GlobalInvocationID].gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _33[gl_GlobalInvocationID].gl_CullDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].C_v = _18[gl_GlobalInvocationID.x % 4].v;
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _33[gl_GlobalInvocationID.x % 4].gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance[0];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.P_v = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -84,13 +84,13 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
gl_out[gl_InvocationID].gl_PointSize = _33[gl_GlobalInvocationID].gl_PointSize;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _33[gl_GlobalInvocationID].gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _33[gl_GlobalInvocationID].gl_CullDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].C_v = _18[gl_GlobalInvocationID.x % 4].v;
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _33[gl_GlobalInvocationID.x % 4].gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance[0];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.P_v = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -76,11 +76,11 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStoragefoo[8][4];
threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
foo[gl_InvocationID] = _15[gl_InvocationID];
gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
foo[gl_GlobalInvocationID.x % 4] = _15[gl_GlobalInvocationID.x % 4];
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.foo_patch = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -73,11 +73,11 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup float4 spvStoragefoo_patch[8];
threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];

View File

@ -74,13 +74,13 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID].gl_Position;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID].gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID].gl_CullDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4].gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance[0];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.foo_patch = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -74,13 +74,13 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID].gl_PointSize;
gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID].gl_ClipDistance[0];
gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID].gl_CullDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4].gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0] = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance[0];
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance[0] = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance[0];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.foo_patch = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;

View File

@ -2169,10 +2169,11 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
{
entry_func.fixup_hooks_in.push_back([=, &var]() {
uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex);
auto invocation = to_tesc_invocation_id();
statement(to_expression(stage_out_ptr_var_id), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "].",
invocation, "].",
to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
invocation, "];");
});
}
else
@ -2774,8 +2775,8 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
bool unroll_array = !mbr_type.array.empty() && is_builtin;
AccessChainMeta chain_meta;
auto constant_chain = access_chain_internal(var.initializer, &builtin_invocation_id_id, 1, 0, &chain_meta);
auto invocation = to_tesc_invocation_id();
auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]");
if (unroll_array)
{
@ -2785,15 +2786,16 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
for (uint32_t i = 0; i < len; i++)
{
statement(to_expression(stage_out_ptr_var_id), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "].",
invocation, "].",
to_member_name(ib_type, index), "[", i, "] = ",
constant_chain, ".", to_member_name(type, mbr_idx), "[", i, "];");
constant_chain, ".",
to_member_name(type, mbr_idx), "[", i, "];");
}
}
else
{
statement(to_expression(stage_out_ptr_var_id), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "].",
invocation, "].",
to_member_name(ib_type, index), " = ",
constant_chain, ".", to_member_name(type, mbr_idx), ";");
}
@ -2958,6 +2960,18 @@ bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage)
return false;
}
string CompilerMSL::to_tesc_invocation_id()
{
if (msl_options.multi_patch_workgroup)
{
// n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
// not the TC invocation ID.
return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices);
}
else
return builtin_to_glsl(BuiltInInvocationId, StorageClassInput);
}
void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
@ -3014,10 +3028,11 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
if (strip_array)
{
entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
auto invocation = to_tesc_invocation_id();
statement(to_expression(masked_var.self), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "] = ",
invocation, "] = ",
to_expression(initializer), "[",
builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
invocation, "];");
});
}
else

View File

@ -806,6 +806,7 @@ protected:
bool allow_local_declaration = false;
};
std::string to_tesc_invocation_id();
void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array);
void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
SPIRVariable &var, InterfaceBlockMeta &meta);