MSL: Don't mask off inactive bits in ballot masks.

This was based on my misreading the spec. The Vulkan CTS expects the
bits to be set, even if the invocations corresponding to them are
inactive.
This commit is contained in:
Chip Davis 2020-11-23 12:29:47 -06:00
parent 1f178be3c9
commit 1e67b21ee9
2 changed files with 6 additions and 7 deletions

View File

@ -230,8 +230,8 @@ kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgrou
{
uint gl_SubgroupSize = 32;
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, 32 - gl_SubgroupInvocationID), uint3(0)) & spvSubgroupBallot(true);
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, 32 - gl_SubgroupInvocationID - 1), uint3(0)) & spvSubgroupBallot(true);
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, 32 - gl_SubgroupInvocationID), uint3(0));
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, 32 - gl_SubgroupInvocationID - 1), uint3(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
_9.FragColor = float(gl_NumSubgroups);

View File

@ -11224,7 +11224,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ",
msl_options.fixed_subgroup_size, " - max(",
to_expression(builtin_subgroup_invocation_id_id),
", 32u)), uint2(0)) & spvSubgroupBallot(true);");
", 32u)), uint2(0));");
}
else if (msl_options.fixed_subgroup_size != 0)
{
@ -11233,7 +11233,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
to_expression(builtin_subgroup_invocation_id_id), ", ",
msl_options.fixed_subgroup_size, " - ",
to_expression(builtin_subgroup_invocation_id_id),
"), uint3(0)) & spvSubgroupBallot(true);");
"), uint3(0));");
}
else if (msl_options.is_ios())
{
@ -11277,8 +11277,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ",
msl_options.fixed_subgroup_size, " - max(",
to_expression(builtin_subgroup_invocation_id_id),
" + 1, 32u)), uint2(0)) & "
"spvSubgroupBallot(true);");
" + 1, 32u)), uint2(0));");
}
else if (msl_options.fixed_subgroup_size != 0)
{
@ -11287,7 +11286,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
msl_options.fixed_subgroup_size, " - ",
to_expression(builtin_subgroup_invocation_id_id),
" - 1), uint3(0)) & spvSubgroupBallot(true);");
" - 1), uint3(0));");
}
else if (msl_options.is_ios())
{