MSL: extract global variables from subgroup ballot operations
Fixes #1513.
This commit is contained in:
parent
6fc2a0581a
commit
f0239bce05
@ -0,0 +1,86 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
uint2 FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
ballot &= mask;
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
ballot &= mask;
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvPopCount4(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint sub1(thread uint& gl_SubgroupSize)
|
||||
{
|
||||
return spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint sub2(thread uint& gl_SubgroupSize)
|
||||
{
|
||||
return spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint sub3(thread uint& gl_SubgroupSize)
|
||||
{
|
||||
return spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint sub4(thread uint& gl_SubgroupInvocationID)
|
||||
{
|
||||
return spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint sub5(thread uint& gl_SubgroupInvocationID)
|
||||
{
|
||||
return spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
|
||||
}
|
||||
|
||||
fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.FragColor.x = (((sub1(gl_SubgroupSize) + sub2(gl_SubgroupSize)) + sub3(gl_SubgroupSize)) + sub4(gl_SubgroupInvocationID)) + sub5(gl_SubgroupInvocationID);
|
||||
return out;
|
||||
}
|
||||
|
30
shaders-msl/frag/subgroup-globals-extract.msl22.frag
Normal file
30
shaders-msl/frag/subgroup-globals-extract.msl22.frag
Normal file
@ -0,0 +1,30 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
|
||||
layout(location = 0) out uvec2 FragColor;
|
||||
|
||||
uint sub1() {
|
||||
return subgroupBallotFindLSB(uvec4(1,2,3,4));
|
||||
}
|
||||
|
||||
uint sub2() {
|
||||
return subgroupBallotFindMSB(uvec4(1,2,3,4));
|
||||
}
|
||||
|
||||
uint sub3() {
|
||||
return subgroupBallotBitCount(uvec4(1,2,3,4));
|
||||
}
|
||||
|
||||
uint sub4() {
|
||||
return subgroupBallotInclusiveBitCount(uvec4(1,2,3,4));
|
||||
}
|
||||
|
||||
uint sub5() {
|
||||
return subgroupBallotExclusiveBitCount(uvec4(1,2,3,4));
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor.x = sub1() + sub2() + sub3() + sub4() + sub5();
|
||||
}
|
@ -1491,6 +1491,37 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
}
|
||||
}
|
||||
|
||||
case OpGroupNonUniformInverseBallot:
|
||||
{
|
||||
added_arg_ids.insert(builtin_subgroup_invocation_id_id);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpGroupNonUniformBallotFindLSB:
|
||||
case OpGroupNonUniformBallotFindMSB:
|
||||
{
|
||||
added_arg_ids.insert(builtin_subgroup_size_id);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpGroupNonUniformBallotBitCount:
|
||||
{
|
||||
auto operation = static_cast<GroupOperation>(ops[3]);
|
||||
switch (operation)
|
||||
{
|
||||
case GroupOperationReduce:
|
||||
added_arg_ids.insert(builtin_subgroup_size_id);
|
||||
break;
|
||||
case GroupOperationInclusiveScan:
|
||||
case GroupOperationExclusiveScan:
|
||||
added_arg_ids.insert(builtin_subgroup_invocation_id_id);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user