GLSL: Checking for inactive threads in non uniform arithmetic ops

The last argument of shuffle*NV functions does not return if the
current thread is inactive, but only if it is out of range.
This commit is contained in:
georgeouzou 2023-04-03 18:42:08 +03:00
parent dbf6218883
commit ab3a6212b8

View File

@ -4058,7 +4058,8 @@ void CompilerGLSL::emit_subgroup_arithmetic_workaround(std::string func, Op op,
statement(t.type, " ", func, "(", t.type, " v)");
begin_scope();
statement(t.type, " ", result, " = ", t.identity, ";");
statement("if (subgroupBallotBitCount(subgroupBallot(true)) == gl_SubgroupSize)");
statement("uvec4 active_threads = subgroupBallot(true);");
statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
begin_scope();
statement("uint total = gl_SubgroupSize / 2u;");
statement(result, " = v;");
@ -4096,8 +4097,8 @@ void CompilerGLSL::emit_subgroup_arithmetic_workaround(std::string func, Op op,
}
statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
begin_scope();
statement("bool valid;");
statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize, valid);");
statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
{
statement("valid = valid && (i < total);");
@ -17915,10 +17916,11 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSub
case SubgroupArithmeticIAddInclusiveScan:
case SubgroupArithmeticFAddReduce:
case SubgroupArithmeticFAddInclusiveScan:
return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask };
return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
case SubgroupArithmeticIAddExclusiveScan:
case SubgroupArithmeticFAddExclusiveScan:
return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupElect };
return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
default:
return {};
}