Merge branch 'msl-subgroup-ops-2' of git://github.com/cdavis5e/SPIRV-Cross
This commit is contained in:
commit
35d3b9c3e7
@ -323,7 +323,7 @@ if (SPIRV_CROSS_STATIC)
|
||||
endif()
|
||||
|
||||
set(spirv-cross-abi-major 0)
|
||||
set(spirv-cross-abi-minor 42)
|
||||
set(spirv-cross-abi-minor 43)
|
||||
set(spirv-cross-abi-patch 0)
|
||||
|
||||
if (SPIRV_CROSS_SHARED)
|
||||
|
19
main.cpp
19
main.cpp
@ -566,6 +566,9 @@ struct CLIArguments
|
||||
uint32_t msl_r32ui_linear_texture_alignment = 4;
|
||||
uint32_t msl_r32ui_alignment_constant_id = 65535;
|
||||
bool msl_texture_1d_as_2d = false;
|
||||
bool msl_ios_use_simdgroup_functions = false;
|
||||
bool msl_emulate_subgroups = false;
|
||||
uint32_t msl_fixed_subgroup_size = 0;
|
||||
bool glsl_emit_push_constant_as_ubo = false;
|
||||
bool glsl_emit_ubo_as_plain_uniforms = false;
|
||||
bool glsl_force_flattened_io_blocks = false;
|
||||
@ -779,7 +782,14 @@ static void print_help_msl()
|
||||
"\t[--msl-r32ui-linear-texture-align-constant-id <id>]:\n\t\tThe function constant ID to use for the linear texture alignment.\n"
|
||||
"\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"
|
||||
"\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n"
|
||||
"\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n");
|
||||
"\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n"
|
||||
"\t[--msl-ios-use-simdgroup-functions]:\n\t\tUse simd_*() functions for subgroup ops instead of quad_*().\n"
|
||||
"\t\tRecent Apple GPUs support SIMD-groups larger than a quad. Use this option to take advantage of this support.\n"
|
||||
"\t[--msl-emulate-subgroups]:\n\t\tAssume subgroups of size 1.\n"
|
||||
"\t\tIntended for Vulkan Portability implementations where Metal support for SIMD-groups is insufficient for true subgroups.\n"
|
||||
"\t[--msl-fixed-subgroup-size <size>]:\n\t\tAssign a constant <size> to the SubgroupSize builtin.\n"
|
||||
"\t\tIntended for Vulkan Portability implementations where VK_EXT_subgroup_size_control is not supported or disabled.\n"
|
||||
"\t\tIf 0, assume variable subgroup size as actually exposed by Metal.\n");
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
@ -1021,6 +1031,9 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
|
||||
msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment;
|
||||
msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id;
|
||||
msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d;
|
||||
msl_opts.ios_use_simdgroup_functions = args.msl_ios_use_simdgroup_functions;
|
||||
msl_opts.emulate_subgroups = args.msl_emulate_subgroups;
|
||||
msl_opts.fixed_subgroup_size = args.msl_fixed_subgroup_size;
|
||||
msl_comp->set_msl_options(msl_opts);
|
||||
for (auto &v : args.msl_discrete_descriptor_sets)
|
||||
msl_comp->add_discrete_descriptor_set(v);
|
||||
@ -1449,6 +1462,10 @@ static int main_inner(int argc, char *argv[])
|
||||
cbs.add("--msl-r32ui-linear-texture-align-constant-id",
|
||||
[&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); });
|
||||
cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; });
|
||||
cbs.add("--msl-ios-use-simdgroup-functions", [&args](CLIParser &) { args.msl_ios_use_simdgroup_functions = true; });
|
||||
cbs.add("--msl-emulate-subgroups", [&args](CLIParser &) { args.msl_emulate_subgroups = true; });
|
||||
cbs.add("--msl-fixed-subgroup-size",
|
||||
[&args](CLIParser &parser) { args.msl_fixed_subgroup_size = parser.next_uint(); });
|
||||
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
|
||||
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
|
||||
auto old_name = parser.next_string();
|
||||
|
@ -0,0 +1,30 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], unsupported-built-in-type gl_WorkGroupSize [[unsupported-built-in]])
|
||||
{
|
||||
uint gl_NumSubgroups = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
|
||||
uint gl_SubgroupID = gl_LocalInvocationIndex;
|
||||
uint gl_SubgroupSize = 1;
|
||||
uint gl_SubgroupInvocationID = 0;
|
||||
_9.FragColor = float(gl_NumSubgroups);
|
||||
_9.FragColor = float(gl_SubgroupID);
|
||||
_9.FragColor = float(gl_SubgroupSize);
|
||||
_9.FragColor = float(gl_SubgroupInvocationID);
|
||||
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
threadgroup_barrier(mem_flags::mem_device);
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
threadgroup_barrier(mem_flags::mem_texture);
|
||||
bool elected = true;
|
||||
}
|
||||
|
@ -242,7 +242,8 @@ kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgrou
|
||||
simdgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_barrier(mem_flags::mem_threadgroup);
|
||||
simdgroup_barrier(mem_flags::mem_texture);
|
||||
bool elected = simd_is_first();
|
||||
bool _39 = simd_is_first();
|
||||
bool elected = _39;
|
||||
_9.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
|
@ -0,0 +1,322 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcast(T value, ushort lane)
|
||||
{
|
||||
return simd_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcast(bool value, ushort lane)
|
||||
{
|
||||
return !!simd_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcastFirst(T value)
|
||||
{
|
||||
return simd_broadcast_first(value);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcastFirst(bool value)
|
||||
{
|
||||
return !!simd_broadcast_first((ushort)value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
|
||||
{
|
||||
return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
|
||||
}
|
||||
|
||||
inline uint4 spvSubgroupBallot(bool value)
|
||||
{
|
||||
simd_vote vote = simd_ballot(value);
|
||||
// simd_ballot() returns a 64-bit integer-like object, but
|
||||
// SPIR-V callers expect a uint4. We must convert.
|
||||
// FIXME: This won't include higher bits if Apple ever supports
|
||||
// 128 lanes in an SIMD-group.
|
||||
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
|
||||
}
|
||||
|
||||
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
|
||||
{
|
||||
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
ballot &= mask;
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
ballot &= mask;
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvPopCount4(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool spvSubgroupAllEqual(T value)
|
||||
{
|
||||
return simd_all(all(value == simd_broadcast_first(value)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupAllEqual(bool value)
|
||||
{
|
||||
return simd_all(value) || !simd_any(value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline bool spvSubgroupAllEqual(vec<bool, N> value)
|
||||
{
|
||||
return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffle(T value, ushort lane)
|
||||
{
|
||||
return simd_shuffle(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffle(bool value, ushort lane)
|
||||
{
|
||||
return !!simd_shuffle((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleXor(T value, ushort mask)
|
||||
{
|
||||
return simd_shuffle_xor(value, mask);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleXor(bool value, ushort mask)
|
||||
{
|
||||
return !!simd_shuffle_xor((ushort)value, mask);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleUp(T value, ushort delta)
|
||||
{
|
||||
return simd_shuffle_up(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleUp(bool value, ushort delta)
|
||||
{
|
||||
return !!simd_shuffle_up((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleDown(T value, ushort delta)
|
||||
{
|
||||
return simd_shuffle_down(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleDown(bool value, ushort delta)
|
||||
{
|
||||
return !!simd_shuffle_down((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadBroadcast(T value, uint lane)
|
||||
{
|
||||
return quad_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadBroadcast(bool value, uint lane)
|
||||
{
|
||||
return !!quad_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
|
||||
{
|
||||
return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadSwap(T value, uint dir)
|
||||
{
|
||||
return quad_shuffle_xor(value, dir + 1);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadSwap(bool value, uint dir)
|
||||
{
|
||||
return !!quad_shuffle_xor((ushort)value, dir + 1);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
|
||||
}
|
||||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
|
||||
{
|
||||
uint gl_SubgroupSize = 32;
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, 32 - gl_SubgroupInvocationID), uint3(0)) & spvSubgroupBallot(true);
|
||||
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, 32 - gl_SubgroupInvocationID - 1), uint3(0)) & spvSubgroupBallot(true);
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
_9.FragColor = float(gl_NumSubgroups);
|
||||
_9.FragColor = float(gl_SubgroupID);
|
||||
_9.FragColor = float(gl_SubgroupSize);
|
||||
_9.FragColor = float(gl_SubgroupInvocationID);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_barrier(mem_flags::mem_threadgroup);
|
||||
simdgroup_barrier(mem_flags::mem_texture);
|
||||
bool _39 = simd_is_first();
|
||||
bool elected = _39;
|
||||
_9.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLtMask).x;
|
||||
float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
|
||||
bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
|
||||
float3 first = spvSubgroupBroadcastFirst(float3(20.0));
|
||||
bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
|
||||
uint4 ballot_value = spvSubgroupBallot(true);
|
||||
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
|
||||
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
|
||||
uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
|
||||
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
|
||||
uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
|
||||
uint shuffled = spvSubgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = spvSubgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
|
||||
uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
|
||||
bool has_all = simd_all(true);
|
||||
bool has_any = simd_any(true);
|
||||
bool has_equal = spvSubgroupAllEqual(0);
|
||||
has_equal = spvSubgroupAllEqual(true);
|
||||
has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
|
||||
has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
|
||||
float4 added = simd_sum(float4(20.0));
|
||||
int4 iadded = simd_sum(int4(20));
|
||||
float4 multiplied = simd_product(float4(20.0));
|
||||
int4 imultiplied = simd_product(int4(20));
|
||||
float4 lo = simd_min(float4(20.0));
|
||||
float4 hi = simd_max(float4(20.0));
|
||||
int4 slo = simd_min(int4(20));
|
||||
int4 shi = simd_max(int4(20));
|
||||
uint4 ulo = simd_min(uint4(20u));
|
||||
uint4 uhi = simd_max(uint4(20u));
|
||||
uint4 anded = simd_and(ballot_value);
|
||||
uint4 ored = simd_or(ballot_value);
|
||||
uint4 xored = simd_xor(ballot_value);
|
||||
added = simd_prefix_inclusive_sum(added);
|
||||
iadded = simd_prefix_inclusive_sum(iadded);
|
||||
multiplied = simd_prefix_inclusive_product(multiplied);
|
||||
imultiplied = simd_prefix_inclusive_product(imultiplied);
|
||||
added = simd_prefix_exclusive_sum(multiplied);
|
||||
multiplied = simd_prefix_exclusive_product(multiplied);
|
||||
iadded = simd_prefix_exclusive_sum(imultiplied);
|
||||
imultiplied = simd_prefix_exclusive_product(imultiplied);
|
||||
added = quad_sum(added);
|
||||
multiplied = quad_product(multiplied);
|
||||
iadded = quad_sum(iadded);
|
||||
imultiplied = quad_product(imultiplied);
|
||||
lo = quad_min(lo);
|
||||
hi = quad_max(hi);
|
||||
ulo = quad_min(ulo);
|
||||
uhi = quad_max(uhi);
|
||||
slo = quad_min(slo);
|
||||
shi = quad_max(shi);
|
||||
anded = quad_and(anded);
|
||||
ored = quad_or(ored);
|
||||
xored = quad_xor(xored);
|
||||
float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
|
||||
bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
|
||||
float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
|
||||
bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
|
||||
float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
|
||||
bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
|
||||
float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
|
||||
bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
|
||||
}
|
||||
|
@ -0,0 +1,282 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcast(T value, ushort lane)
|
||||
{
|
||||
return quad_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcast(bool value, ushort lane)
|
||||
{
|
||||
return !!quad_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcastFirst(T value)
|
||||
{
|
||||
return quad_broadcast_first(value);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcastFirst(bool value)
|
||||
{
|
||||
return !!quad_broadcast_first((ushort)value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
|
||||
{
|
||||
return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);
|
||||
}
|
||||
|
||||
inline uint4 spvSubgroupBallot(bool value)
|
||||
{
|
||||
return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);
|
||||
}
|
||||
|
||||
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
|
||||
{
|
||||
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
ballot &= mask;
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
ballot &= mask;
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvPopCount4(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool spvSubgroupAllEqual(T value)
|
||||
{
|
||||
return quad_all(all(value == quad_broadcast_first(value)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupAllEqual(bool value)
|
||||
{
|
||||
return quad_all(value) || !quad_any(value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline bool spvSubgroupAllEqual(vec<bool, N> value)
|
||||
{
|
||||
return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffle(T value, ushort lane)
|
||||
{
|
||||
return quad_shuffle(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffle(bool value, ushort lane)
|
||||
{
|
||||
return !!quad_shuffle((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleXor(T value, ushort mask)
|
||||
{
|
||||
return quad_shuffle_xor(value, mask);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleXor(bool value, ushort mask)
|
||||
{
|
||||
return !!quad_shuffle_xor((ushort)value, mask);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleUp(T value, ushort delta)
|
||||
{
|
||||
return quad_shuffle_up(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleUp(bool value, ushort delta)
|
||||
{
|
||||
return !!quad_shuffle_up((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleDown(T value, ushort delta)
|
||||
{
|
||||
return quad_shuffle_down(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleDown(bool value, ushort delta)
|
||||
{
|
||||
return !!quad_shuffle_down((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadBroadcast(T value, uint lane)
|
||||
{
|
||||
return quad_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadBroadcast(bool value, uint lane)
|
||||
{
|
||||
return !!quad_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
|
||||
{
|
||||
return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadSwap(T value, uint dir)
|
||||
{
|
||||
return quad_shuffle_xor(value, dir + 1);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadSwap(bool value, uint dir)
|
||||
{
|
||||
return !!quad_shuffle_xor((ushort)value, dir + 1);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
|
||||
}
|
||||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
|
||||
{
|
||||
uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0));
|
||||
_9.FragColor = float(gl_NumSubgroups);
|
||||
_9.FragColor = float(gl_SubgroupID);
|
||||
_9.FragColor = float(gl_SubgroupSize);
|
||||
_9.FragColor = float(gl_SubgroupInvocationID);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_barrier(mem_flags::mem_threadgroup);
|
||||
simdgroup_barrier(mem_flags::mem_texture);
|
||||
bool _39 = quad_is_first();
|
||||
bool elected = _39;
|
||||
_9.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLtMask).x;
|
||||
float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
|
||||
bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
|
||||
float3 first = spvSubgroupBroadcastFirst(float3(20.0));
|
||||
bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
|
||||
uint4 ballot_value = spvSubgroupBallot(true);
|
||||
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
|
||||
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
|
||||
uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
|
||||
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
|
||||
uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
|
||||
uint shuffled = spvSubgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = spvSubgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
|
||||
uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
|
||||
bool has_all = quad_all(true);
|
||||
bool has_any = quad_any(true);
|
||||
bool has_equal = spvSubgroupAllEqual(0);
|
||||
has_equal = spvSubgroupAllEqual(true);
|
||||
has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
|
||||
has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
|
||||
float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
|
||||
bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
|
||||
float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
|
||||
bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
|
||||
float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
|
||||
bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
|
||||
float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
|
||||
bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
|
||||
}
|
||||
|
@ -0,0 +1,316 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcast(T value, ushort lane)
|
||||
{
|
||||
return simd_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcast(bool value, ushort lane)
|
||||
{
|
||||
return !!simd_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupBroadcastFirst(T value)
|
||||
{
|
||||
return simd_broadcast_first(value);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupBroadcastFirst(bool value)
|
||||
{
|
||||
return !!simd_broadcast_first((ushort)value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
|
||||
{
|
||||
return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
|
||||
}
|
||||
|
||||
inline uint4 spvSubgroupBallot(bool value)
|
||||
{
|
||||
return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);
|
||||
}
|
||||
|
||||
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
|
||||
{
|
||||
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
ballot &= mask;
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
ballot &= mask;
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvPopCount4(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));
|
||||
return spvPopCount4(ballot & mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool spvSubgroupAllEqual(T value)
|
||||
{
|
||||
return simd_all(all(value == simd_broadcast_first(value)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupAllEqual(bool value)
|
||||
{
|
||||
return simd_all(value) || !simd_any(value);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline bool spvSubgroupAllEqual(vec<bool, N> value)
|
||||
{
|
||||
return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffle(T value, ushort lane)
|
||||
{
|
||||
return simd_shuffle(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffle(bool value, ushort lane)
|
||||
{
|
||||
return !!simd_shuffle((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleXor(T value, ushort mask)
|
||||
{
|
||||
return simd_shuffle_xor(value, mask);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleXor(bool value, ushort mask)
|
||||
{
|
||||
return !!simd_shuffle_xor((ushort)value, mask);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleUp(T value, ushort delta)
|
||||
{
|
||||
return simd_shuffle_up(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleUp(bool value, ushort delta)
|
||||
{
|
||||
return !!simd_shuffle_up((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvSubgroupShuffleDown(T value, ushort delta)
|
||||
{
|
||||
return simd_shuffle_down(value, delta);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupShuffleDown(bool value, ushort delta)
|
||||
{
|
||||
return !!simd_shuffle_down((ushort)value, delta);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
|
||||
{
|
||||
return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadBroadcast(T value, uint lane)
|
||||
{
|
||||
return quad_broadcast(value, lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadBroadcast(bool value, uint lane)
|
||||
{
|
||||
return !!quad_broadcast((ushort)value, lane);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
|
||||
{
|
||||
return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T spvQuadSwap(T value, uint dir)
|
||||
{
|
||||
return quad_shuffle_xor(value, dir + 1);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvQuadSwap(bool value, uint dir)
|
||||
{
|
||||
return !!quad_shuffle_xor((ushort)value, dir + 1);
|
||||
}
|
||||
|
||||
template<uint N>
|
||||
inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
|
||||
{
|
||||
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
|
||||
}
|
||||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
|
||||
{
|
||||
uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0));
|
||||
_9.FragColor = float(gl_NumSubgroups);
|
||||
_9.FragColor = float(gl_SubgroupID);
|
||||
_9.FragColor = float(gl_SubgroupSize);
|
||||
_9.FragColor = float(gl_SubgroupInvocationID);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
||||
simdgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_barrier(mem_flags::mem_threadgroup);
|
||||
simdgroup_barrier(mem_flags::mem_texture);
|
||||
bool _39 = simd_is_first();
|
||||
bool elected = _39;
|
||||
_9.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLeMask).x;
|
||||
_9.FragColor = float4(gl_SubgroupLtMask).x;
|
||||
float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
|
||||
bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
|
||||
float3 first = spvSubgroupBroadcastFirst(float3(20.0));
|
||||
bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
|
||||
uint4 ballot_value = spvSubgroupBallot(true);
|
||||
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
|
||||
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
|
||||
uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
|
||||
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
|
||||
uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
|
||||
uint shuffled = spvSubgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = spvSubgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
|
||||
uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
|
||||
bool has_all = simd_all(true);
|
||||
bool has_any = simd_any(true);
|
||||
bool has_equal = spvSubgroupAllEqual(0);
|
||||
has_equal = spvSubgroupAllEqual(true);
|
||||
has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
|
||||
has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
|
||||
float4 added = simd_sum(float4(20.0));
|
||||
int4 iadded = simd_sum(int4(20));
|
||||
float4 multiplied = simd_product(float4(20.0));
|
||||
int4 imultiplied = simd_product(int4(20));
|
||||
float4 lo = simd_min(float4(20.0));
|
||||
float4 hi = simd_max(float4(20.0));
|
||||
int4 slo = simd_min(int4(20));
|
||||
int4 shi = simd_max(int4(20));
|
||||
uint4 ulo = simd_min(uint4(20u));
|
||||
uint4 uhi = simd_max(uint4(20u));
|
||||
uint4 anded = simd_and(ballot_value);
|
||||
uint4 ored = simd_or(ballot_value);
|
||||
uint4 xored = simd_xor(ballot_value);
|
||||
added = simd_prefix_inclusive_sum(added);
|
||||
iadded = simd_prefix_inclusive_sum(iadded);
|
||||
multiplied = simd_prefix_inclusive_product(multiplied);
|
||||
imultiplied = simd_prefix_inclusive_product(imultiplied);
|
||||
added = simd_prefix_exclusive_sum(multiplied);
|
||||
multiplied = simd_prefix_exclusive_product(multiplied);
|
||||
iadded = simd_prefix_exclusive_sum(imultiplied);
|
||||
imultiplied = simd_prefix_exclusive_product(imultiplied);
|
||||
added = quad_sum(added);
|
||||
multiplied = quad_product(multiplied);
|
||||
iadded = quad_sum(iadded);
|
||||
imultiplied = quad_product(imultiplied);
|
||||
lo = quad_min(lo);
|
||||
hi = quad_max(hi);
|
||||
ulo = quad_min(ulo);
|
||||
uhi = quad_max(uhi);
|
||||
slo = quad_min(slo);
|
||||
shi = quad_max(shi);
|
||||
anded = quad_and(anded);
|
||||
ored = quad_or(ored);
|
||||
xored = quad_xor(xored);
|
||||
float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
|
||||
bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
|
||||
float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
|
||||
bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
|
||||
float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
|
||||
bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
|
||||
float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
|
||||
bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
|
||||
}
|
||||
|
@ -224,11 +224,9 @@ inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
|
||||
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
|
||||
}
|
||||
|
||||
fragment main0_out main0()
|
||||
fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
|
||||
{
|
||||
main0_out out = {};
|
||||
uint gl_SubgroupSize = simd_sum(1);
|
||||
uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
|
||||
@ -236,7 +234,8 @@ fragment main0_out main0()
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
out.FragColor = float(gl_SubgroupSize);
|
||||
out.FragColor = float(gl_SubgroupInvocationID);
|
||||
bool elected = simd_is_first();
|
||||
bool _24 = simd_is_first();
|
||||
bool elected = _24;
|
||||
out.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGtMask).x;
|
@ -0,0 +1,25 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
// Reduced test for emulated functionality.
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_NumSubgroups);
|
||||
FragColor = float(gl_SubgroupID);
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle_relative : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#extension GL_KHR_shader_subgroup_clustered : require
|
||||
#extension GL_KHR_shader_subgroup_quad : require
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_NumSubgroups);
|
||||
FragColor = float(gl_SubgroupID);
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
|
||||
// ballot
|
||||
FragColor = float(gl_SubgroupEqMask);
|
||||
FragColor = float(gl_SubgroupGeMask);
|
||||
FragColor = float(gl_SubgroupGtMask);
|
||||
FragColor = float(gl_SubgroupLeMask);
|
||||
FragColor = float(gl_SubgroupLtMask);
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
|
||||
// shuffle
|
||||
uint shuffled = subgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = subgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
|
||||
|
||||
// shuffle relative
|
||||
uint shuffled_up = subgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = subgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
|
||||
|
||||
// vote
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal = subgroupAllEqual(0);
|
||||
has_equal = subgroupAllEqual(true);
|
||||
has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
|
||||
has_equal = subgroupAllEqual(bvec4(true, true, false, true));
|
||||
|
||||
// arithmetic
|
||||
vec4 added = subgroupAdd(vec4(20.0));
|
||||
ivec4 iadded = subgroupAdd(ivec4(20));
|
||||
vec4 multiplied = subgroupMul(vec4(20.0));
|
||||
ivec4 imultiplied = subgroupMul(ivec4(20));
|
||||
vec4 lo = subgroupMin(vec4(20.0));
|
||||
vec4 hi = subgroupMax(vec4(20.0));
|
||||
ivec4 slo = subgroupMin(ivec4(20));
|
||||
ivec4 shi = subgroupMax(ivec4(20));
|
||||
uvec4 ulo = subgroupMin(uvec4(20));
|
||||
uvec4 uhi = subgroupMax(uvec4(20));
|
||||
uvec4 anded = subgroupAnd(ballot_value);
|
||||
uvec4 ored = subgroupOr(ballot_value);
|
||||
uvec4 xored = subgroupXor(ballot_value);
|
||||
|
||||
added = subgroupInclusiveAdd(added);
|
||||
iadded = subgroupInclusiveAdd(iadded);
|
||||
multiplied = subgroupInclusiveMul(multiplied);
|
||||
imultiplied = subgroupInclusiveMul(imultiplied);
|
||||
//lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupInclusiveMax(hi);
|
||||
//slo = subgroupInclusiveMin(slo);
|
||||
//shi = subgroupInclusiveMax(shi);
|
||||
//ulo = subgroupInclusiveMin(ulo);
|
||||
//uhi = subgroupInclusiveMax(uhi);
|
||||
//anded = subgroupInclusiveAnd(anded);
|
||||
//ored = subgroupInclusiveOr(ored);
|
||||
//xored = subgroupInclusiveXor(ored);
|
||||
//added = subgroupExclusiveAdd(lo);
|
||||
|
||||
added = subgroupExclusiveAdd(multiplied);
|
||||
multiplied = subgroupExclusiveMul(multiplied);
|
||||
iadded = subgroupExclusiveAdd(imultiplied);
|
||||
imultiplied = subgroupExclusiveMul(imultiplied);
|
||||
//lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupExclusiveMax(hi);
|
||||
//ulo = subgroupExclusiveMin(ulo);
|
||||
//uhi = subgroupExclusiveMax(uhi);
|
||||
//slo = subgroupExclusiveMin(slo);
|
||||
//shi = subgroupExclusiveMax(shi);
|
||||
//anded = subgroupExclusiveAnd(anded);
|
||||
//ored = subgroupExclusiveOr(ored);
|
||||
//xored = subgroupExclusiveXor(ored);
|
||||
|
||||
// clustered
|
||||
added = subgroupClusteredAdd(added, 4u);
|
||||
multiplied = subgroupClusteredMul(multiplied, 4u);
|
||||
iadded = subgroupClusteredAdd(iadded, 4u);
|
||||
imultiplied = subgroupClusteredMul(imultiplied, 4u);
|
||||
lo = subgroupClusteredMin(lo, 4u);
|
||||
hi = subgroupClusteredMax(hi, 4u);
|
||||
ulo = subgroupClusteredMin(ulo, 4u);
|
||||
uhi = subgroupClusteredMax(uhi, 4u);
|
||||
slo = subgroupClusteredMin(slo, 4u);
|
||||
shi = subgroupClusteredMax(shi, 4u);
|
||||
anded = subgroupClusteredAnd(anded, 4u);
|
||||
ored = subgroupClusteredOr(ored, 4u);
|
||||
xored = subgroupClusteredXor(xored, 4u);
|
||||
|
||||
// quad
|
||||
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
|
||||
bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
|
||||
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
|
||||
bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
|
||||
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
|
||||
bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
|
||||
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
|
||||
bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle_relative : require
|
||||
#extension GL_KHR_shader_subgroup_quad : require
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
// Reduced test for functionality exposed on iOS.
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_NumSubgroups);
|
||||
FragColor = float(gl_SubgroupID);
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
|
||||
// ballot
|
||||
FragColor = float(gl_SubgroupEqMask);
|
||||
FragColor = float(gl_SubgroupGeMask);
|
||||
FragColor = float(gl_SubgroupGtMask);
|
||||
FragColor = float(gl_SubgroupLeMask);
|
||||
FragColor = float(gl_SubgroupLtMask);
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
|
||||
// shuffle
|
||||
uint shuffled = subgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = subgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
|
||||
|
||||
// shuffle relative
|
||||
uint shuffled_up = subgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = subgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
|
||||
|
||||
// vote
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal = subgroupAllEqual(0);
|
||||
has_equal = subgroupAllEqual(true);
|
||||
has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
|
||||
has_equal = subgroupAllEqual(bvec4(true, true, false, true));
|
||||
|
||||
// quad
|
||||
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
|
||||
bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
|
||||
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
|
||||
bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
|
||||
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
|
||||
bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
|
||||
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
|
||||
bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle_relative : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#extension GL_KHR_shader_subgroup_clustered : require
|
||||
#extension GL_KHR_shader_subgroup_quad : require
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_NumSubgroups);
|
||||
FragColor = float(gl_SubgroupID);
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
|
||||
// ballot
|
||||
FragColor = float(gl_SubgroupEqMask);
|
||||
FragColor = float(gl_SubgroupGeMask);
|
||||
FragColor = float(gl_SubgroupGtMask);
|
||||
FragColor = float(gl_SubgroupLeMask);
|
||||
FragColor = float(gl_SubgroupLtMask);
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
|
||||
// shuffle
|
||||
uint shuffled = subgroupShuffle(10u, 8u);
|
||||
bool shuffled_bool = subgroupShuffle(true, 9u);
|
||||
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
|
||||
bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
|
||||
|
||||
// shuffle relative
|
||||
uint shuffled_up = subgroupShuffleUp(20u, 4u);
|
||||
bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
|
||||
uint shuffled_down = subgroupShuffleDown(20u, 4u);
|
||||
bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
|
||||
|
||||
// vote
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal = subgroupAllEqual(0);
|
||||
has_equal = subgroupAllEqual(true);
|
||||
has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
|
||||
has_equal = subgroupAllEqual(bvec4(true, true, false, true));
|
||||
|
||||
// arithmetic
|
||||
vec4 added = subgroupAdd(vec4(20.0));
|
||||
ivec4 iadded = subgroupAdd(ivec4(20));
|
||||
vec4 multiplied = subgroupMul(vec4(20.0));
|
||||
ivec4 imultiplied = subgroupMul(ivec4(20));
|
||||
vec4 lo = subgroupMin(vec4(20.0));
|
||||
vec4 hi = subgroupMax(vec4(20.0));
|
||||
ivec4 slo = subgroupMin(ivec4(20));
|
||||
ivec4 shi = subgroupMax(ivec4(20));
|
||||
uvec4 ulo = subgroupMin(uvec4(20));
|
||||
uvec4 uhi = subgroupMax(uvec4(20));
|
||||
uvec4 anded = subgroupAnd(ballot_value);
|
||||
uvec4 ored = subgroupOr(ballot_value);
|
||||
uvec4 xored = subgroupXor(ballot_value);
|
||||
|
||||
added = subgroupInclusiveAdd(added);
|
||||
iadded = subgroupInclusiveAdd(iadded);
|
||||
multiplied = subgroupInclusiveMul(multiplied);
|
||||
imultiplied = subgroupInclusiveMul(imultiplied);
|
||||
//lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupInclusiveMax(hi);
|
||||
//slo = subgroupInclusiveMin(slo);
|
||||
//shi = subgroupInclusiveMax(shi);
|
||||
//ulo = subgroupInclusiveMin(ulo);
|
||||
//uhi = subgroupInclusiveMax(uhi);
|
||||
//anded = subgroupInclusiveAnd(anded);
|
||||
//ored = subgroupInclusiveOr(ored);
|
||||
//xored = subgroupInclusiveXor(ored);
|
||||
//added = subgroupExclusiveAdd(lo);
|
||||
|
||||
added = subgroupExclusiveAdd(multiplied);
|
||||
multiplied = subgroupExclusiveMul(multiplied);
|
||||
iadded = subgroupExclusiveAdd(imultiplied);
|
||||
imultiplied = subgroupExclusiveMul(imultiplied);
|
||||
//lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupExclusiveMax(hi);
|
||||
//ulo = subgroupExclusiveMin(ulo);
|
||||
//uhi = subgroupExclusiveMax(uhi);
|
||||
//slo = subgroupExclusiveMin(slo);
|
||||
//shi = subgroupExclusiveMax(shi);
|
||||
//anded = subgroupExclusiveAnd(anded);
|
||||
//ored = subgroupExclusiveOr(ored);
|
||||
//xored = subgroupExclusiveXor(ored);
|
||||
|
||||
// clustered
|
||||
added = subgroupClusteredAdd(added, 4u);
|
||||
multiplied = subgroupClusteredMul(multiplied, 4u);
|
||||
iadded = subgroupClusteredAdd(iadded, 4u);
|
||||
imultiplied = subgroupClusteredMul(imultiplied, 4u);
|
||||
lo = subgroupClusteredMin(lo, 4u);
|
||||
hi = subgroupClusteredMax(hi, 4u);
|
||||
ulo = subgroupClusteredMin(ulo, 4u);
|
||||
uhi = subgroupClusteredMax(uhi, 4u);
|
||||
slo = subgroupClusteredMin(slo, 4u);
|
||||
shi = subgroupClusteredMax(shi, 4u);
|
||||
anded = subgroupClusteredAnd(anded, 4u);
|
||||
ored = subgroupClusteredOr(ored, 4u);
|
||||
xored = subgroupClusteredXor(xored, 4u);
|
||||
|
||||
// quad
|
||||
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
|
||||
bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
|
||||
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
|
||||
bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
|
||||
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
|
||||
bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
|
||||
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
|
||||
bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
|
||||
}
|
@ -678,6 +678,18 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
|
||||
case SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID:
|
||||
options->msl.r32ui_alignment_constant_id = value;
|
||||
break;
|
||||
|
||||
case SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS:
|
||||
options->msl.ios_use_simdgroup_functions = value != 0;
|
||||
break;
|
||||
|
||||
case SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS:
|
||||
options->msl.emulate_subgroups = value != 0;
|
||||
break;
|
||||
|
||||
case SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE:
|
||||
options->msl.fixed_subgroup_size = value;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
|
@ -33,7 +33,7 @@ extern "C" {
|
||||
/* Bumped if ABI or API breaks backwards compatibility. */
|
||||
#define SPVC_C_API_VERSION_MAJOR 0
|
||||
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
|
||||
#define SPVC_C_API_VERSION_MINOR 42
|
||||
#define SPVC_C_API_VERSION_MINOR 43
|
||||
/* Bumped if internal implementation details change. */
|
||||
#define SPVC_C_API_VERSION_PATCH 0
|
||||
|
||||
@ -647,6 +647,10 @@ typedef enum spvc_compiler_option
|
||||
|
||||
SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT,
|
||||
|
||||
SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
|
||||
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
|
||||
} spvc_compiler_option;
|
||||
|
||||
|
569
spirv_msl.cpp
569
spirv_msl.cpp
@ -168,9 +168,12 @@ void CompilerMSL::build_implicit_builtins()
|
||||
active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) ||
|
||||
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
|
||||
bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff;
|
||||
bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
|
||||
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
|
||||
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
|
||||
need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id ||
|
||||
needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask)
|
||||
needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask || need_local_invocation_index ||
|
||||
need_workgroup_size)
|
||||
{
|
||||
bool has_frag_coord = false;
|
||||
bool has_sample_id = false;
|
||||
@ -184,6 +187,8 @@ void CompilerMSL::build_implicit_builtins()
|
||||
bool has_subgroup_size = false;
|
||||
bool has_view_idx = false;
|
||||
bool has_layer = false;
|
||||
bool has_local_invocation_index = false;
|
||||
bool has_workgroup_size = false;
|
||||
uint32_t workgroup_id_type = 0;
|
||||
|
||||
// FIXME: Investigate the fact that there are no checks for the entry point interface variables.
|
||||
@ -191,7 +196,6 @@ void CompilerMSL::build_implicit_builtins()
|
||||
if (!ir.meta[var.self].decoration.builtin)
|
||||
return;
|
||||
|
||||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
|
||||
|
||||
if (var.storage == StorageClassOutput)
|
||||
@ -207,6 +211,7 @@ void CompilerMSL::build_implicit_builtins()
|
||||
if (var.storage != StorageClassInput)
|
||||
return;
|
||||
|
||||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
|
||||
{
|
||||
switch (builtin)
|
||||
@ -330,6 +335,20 @@ void CompilerMSL::build_implicit_builtins()
|
||||
}
|
||||
}
|
||||
|
||||
if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex)
|
||||
{
|
||||
builtin_local_invocation_index_id = var.self;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self);
|
||||
has_local_invocation_index = true;
|
||||
}
|
||||
|
||||
if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
|
||||
{
|
||||
builtin_workgroup_size_id = var.self;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
|
||||
has_workgroup_size = true;
|
||||
}
|
||||
|
||||
// The base workgroup needs to have the same type and vector size
|
||||
// as the workgroup or invocation ID, so keep track of the type that
|
||||
// was used.
|
||||
@ -681,6 +700,48 @@ void CompilerMSL::build_implicit_builtins()
|
||||
builtin_sample_mask_id = var_id;
|
||||
mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id);
|
||||
}
|
||||
|
||||
if (need_local_invocation_index && !has_local_invocation_index)
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(2);
|
||||
uint32_t type_ptr_id = offset;
|
||||
uint32_t var_id = offset + 1;
|
||||
|
||||
// Create gl_LocalInvocationIndex.
|
||||
SPIRType uint_type_ptr;
|
||||
uint_type_ptr = get_uint_type();
|
||||
uint_type_ptr.pointer = true;
|
||||
uint_type_ptr.parent_type = get_uint_type_id();
|
||||
uint_type_ptr.storage = StorageClassInput;
|
||||
|
||||
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
|
||||
ptr_type.self = get_uint_type_id();
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex);
|
||||
builtin_local_invocation_index_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id);
|
||||
}
|
||||
|
||||
if (need_workgroup_size && !has_workgroup_size)
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(2);
|
||||
uint32_t type_ptr_id = offset;
|
||||
uint32_t var_id = offset + 1;
|
||||
|
||||
// Create gl_WorkgroupSize.
|
||||
uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
|
||||
SPIRType uint_type_ptr = get<SPIRType>(type_id);
|
||||
uint_type_ptr.pointer = true;
|
||||
uint_type_ptr.parent_type = type_id;
|
||||
uint_type_ptr.storage = StorageClassInput;
|
||||
|
||||
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
|
||||
ptr_type.self = type_id;
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
|
||||
builtin_workgroup_size_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_swizzle_buffer_def)
|
||||
@ -4787,7 +4848,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_broadcast(value, lane);");
|
||||
else
|
||||
statement("return simd_broadcast(value, lane);");
|
||||
@ -4796,7 +4857,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_broadcast((ushort)value, lane);");
|
||||
else
|
||||
statement("return !!simd_broadcast((ushort)value, lane);");
|
||||
@ -4805,7 +4866,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
|
||||
@ -4817,19 +4878,28 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupBroadcastFirst(T value)");
|
||||
begin_scope();
|
||||
statement("return simd_broadcast_first(value);");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_broadcast_first(value);");
|
||||
else
|
||||
statement("return simd_broadcast_first(value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupBroadcastFirst(bool value)");
|
||||
begin_scope();
|
||||
statement("return !!simd_broadcast_first((ushort)value);");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_broadcast_first((ushort)value);");
|
||||
else
|
||||
statement("return !!simd_broadcast_first((ushort)value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
|
||||
begin_scope();
|
||||
statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -4837,13 +4907,26 @@ void CompilerMSL::emit_custom_functions()
|
||||
case SPVFuncImplSubgroupBallot:
|
||||
statement("inline uint4 spvSubgroupBallot(bool value)");
|
||||
begin_scope();
|
||||
statement("simd_vote vote = simd_ballot(value);");
|
||||
statement("// simd_ballot() returns a 64-bit integer-like object, but");
|
||||
statement("// SPIR-V callers expect a uint4. We must convert.");
|
||||
statement("// FIXME: This won't include higher bits if Apple ever supports");
|
||||
statement("// 128 lanes in an SIMD-group.");
|
||||
statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> "
|
||||
"32) & 0xFFFFFFFF), 0, 0);");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
{
|
||||
statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);");
|
||||
}
|
||||
else if (msl_options.is_ios())
|
||||
{
|
||||
// The current simd_vote on iOS uses a 32-bit integer-like object.
|
||||
statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("simd_vote vote = simd_ballot(value);");
|
||||
statement("// simd_ballot() returns a 64-bit integer-like object, but");
|
||||
statement("// SPIR-V callers expect a uint4. We must convert.");
|
||||
statement("// FIXME: This won't include higher bits if Apple ever supports");
|
||||
statement("// 128 lanes in an SIMD-group.");
|
||||
statement(
|
||||
"return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> "
|
||||
"32) & 0xFFFFFFFF), 0, 0);");
|
||||
}
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -4859,8 +4942,15 @@ void CompilerMSL::emit_custom_functions()
|
||||
case SPVFuncImplSubgroupBallotFindLSB:
|
||||
statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
}
|
||||
statement("ballot &= mask;");
|
||||
statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
|
||||
"ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
|
||||
@ -4871,8 +4961,15 @@ void CompilerMSL::emit_custom_functions()
|
||||
case SPVFuncImplSubgroupBallotFindMSB:
|
||||
statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
}
|
||||
statement("ballot &= mask;");
|
||||
statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
|
||||
"(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
|
||||
@ -4889,23 +4986,44 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
}
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
|
||||
"uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
|
||||
"uint2(0));");
|
||||
}
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
|
||||
}
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
@ -4919,19 +5037,28 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline bool spvSubgroupAllEqual(T value)");
|
||||
begin_scope();
|
||||
statement("return simd_all(all(value == simd_broadcast_first(value)));");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_all(all(value == quad_broadcast_first(value)));");
|
||||
else
|
||||
statement("return simd_all(all(value == simd_broadcast_first(value)));");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupAllEqual(bool value)");
|
||||
begin_scope();
|
||||
statement("return simd_all(value) || !simd_any(value);");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_all(value) || !quad_any(value);");
|
||||
else
|
||||
statement("return simd_all(value) || !simd_any(value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
|
||||
begin_scope();
|
||||
statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));");
|
||||
else
|
||||
statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -4940,7 +5067,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffle(T value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_shuffle(value, lane);");
|
||||
else
|
||||
statement("return simd_shuffle(value, lane);");
|
||||
@ -4949,7 +5076,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_shuffle((ushort)value, lane);");
|
||||
else
|
||||
statement("return !!simd_shuffle((ushort)value, lane);");
|
||||
@ -4958,7 +5085,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
|
||||
@ -4970,7 +5097,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_shuffle_xor(value, mask);");
|
||||
else
|
||||
statement("return simd_shuffle_xor(value, mask);");
|
||||
@ -4979,7 +5106,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_shuffle_xor((ushort)value, mask);");
|
||||
else
|
||||
statement("return !!simd_shuffle_xor((ushort)value, mask);");
|
||||
@ -4988,7 +5115,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
|
||||
@ -5000,7 +5127,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_shuffle_up(value, delta);");
|
||||
else
|
||||
statement("return simd_shuffle_up(value, delta);");
|
||||
@ -5009,7 +5136,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_shuffle_up((ushort)value, delta);");
|
||||
else
|
||||
statement("return !!simd_shuffle_up((ushort)value, delta);");
|
||||
@ -5018,7 +5145,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
|
||||
@ -5030,7 +5157,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return quad_shuffle_down(value, delta);");
|
||||
else
|
||||
statement("return simd_shuffle_down(value, delta);");
|
||||
@ -5039,7 +5166,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return !!quad_shuffle_down((ushort)value, delta);");
|
||||
else
|
||||
statement("return !!simd_shuffle_down((ushort)value, delta);");
|
||||
@ -5048,7 +5175,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
|
||||
@ -7677,6 +7804,10 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
|
||||
// Use the wider of the two scopes (smaller value)
|
||||
exe_scope = min(exe_scope, mem_scope);
|
||||
|
||||
if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem)
|
||||
// In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop.
|
||||
return;
|
||||
|
||||
string bar_stmt;
|
||||
if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
|
||||
bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
|
||||
@ -9852,9 +9983,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
|
||||
case BuiltInPrimitiveId:
|
||||
if (msl_options.multi_patch_workgroup)
|
||||
return "";
|
||||
/* fallthrough */
|
||||
return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
|
||||
case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
|
||||
case BuiltInSubgroupSize: // FIXME: Should work in any stage
|
||||
if (msl_options.emulate_subgroups)
|
||||
return "";
|
||||
return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
|
||||
case BuiltInPatchVertices:
|
||||
return "";
|
||||
@ -10062,15 +10195,18 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
|
||||
{
|
||||
switch (builtin)
|
||||
{
|
||||
case BuiltInNumSubgroups:
|
||||
case BuiltInSubgroupId:
|
||||
case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
|
||||
case BuiltInSubgroupSize: // FIXME: Should work in any stage
|
||||
if (msl_options.emulate_subgroups)
|
||||
break;
|
||||
/* fallthrough */
|
||||
case BuiltInGlobalInvocationId:
|
||||
case BuiltInWorkgroupId:
|
||||
case BuiltInNumWorkgroups:
|
||||
case BuiltInLocalInvocationId:
|
||||
case BuiltInLocalInvocationIndex:
|
||||
case BuiltInNumSubgroups:
|
||||
case BuiltInSubgroupId:
|
||||
case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
|
||||
case BuiltInSubgroupSize: // FIXME: Should work in any stage
|
||||
return string(" [[") + builtin_qualifier(builtin) + "]]";
|
||||
|
||||
default:
|
||||
@ -10336,6 +10472,10 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type)
|
||||
case BuiltInViewIndex:
|
||||
return get_execution_model() == ExecutionModelFragment && msl_options.multiview &&
|
||||
msl_options.multiview_layered_rendering;
|
||||
// Compute function in
|
||||
case BuiltInSubgroupId:
|
||||
case BuiltInNumSubgroups:
|
||||
return !msl_options.emulate_subgroups;
|
||||
// Any stage function in
|
||||
case BuiltInDeviceIndex:
|
||||
case BuiltInSubgroupEqMask:
|
||||
@ -10344,10 +10484,12 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type)
|
||||
case BuiltInSubgroupLeMask:
|
||||
case BuiltInSubgroupLtMask:
|
||||
return false;
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
case BuiltInSubgroupSize:
|
||||
return get_execution_model() == ExecutionModelGLCompute ||
|
||||
(get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2));
|
||||
if (msl_options.fixed_subgroup_size != 0)
|
||||
return false;
|
||||
/* fallthrough */
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
return !msl_options.emulate_subgroups;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
@ -10958,67 +11100,78 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
||||
entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
|
||||
}
|
||||
break;
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
// This is natively supported in compute shaders.
|
||||
if (get_execution_model() == ExecutionModelGLCompute)
|
||||
case BuiltInSubgroupId:
|
||||
if (!msl_options.emulate_subgroups)
|
||||
break;
|
||||
|
||||
// This is natively supported in fragment shaders in MSL 2.2.
|
||||
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
|
||||
break;
|
||||
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW(
|
||||
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
|
||||
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW(
|
||||
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
|
||||
|
||||
// Shaders other than compute shaders don't support the SIMD-group
|
||||
// builtins directly, but we can emulate them using the SIMD-group
|
||||
// functions. This might break if some of the subgroup terminated
|
||||
// before reaching the entry point.
|
||||
// For subgroup emulation, this is the same as the local invocation index.
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = simd_prefix_exclusive_sum(1);");
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
to_expression(builtin_local_invocation_index_id), ";");
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupSize:
|
||||
// This is natively supported in compute shaders.
|
||||
if (get_execution_model() == ExecutionModelGLCompute)
|
||||
case BuiltInNumSubgroups:
|
||||
if (!msl_options.emulate_subgroups)
|
||||
break;
|
||||
|
||||
// This is natively supported in fragment shaders in MSL 2.2.
|
||||
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
|
||||
// For subgroup emulation, this is the same as the workgroup size.
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
auto &type = expression_type(builtin_workgroup_size_id);
|
||||
string size_expr = to_expression(builtin_workgroup_size_id);
|
||||
if (type.vecsize >= 3)
|
||||
size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z");
|
||||
else if (type.vecsize == 2)
|
||||
size_expr = join(size_expr, ".x * ", size_expr, ".y");
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";");
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
if (!msl_options.emulate_subgroups)
|
||||
break;
|
||||
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
|
||||
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
|
||||
|
||||
// For subgroup emulation, assume subgroups of size 1.
|
||||
entry_func.fixup_hooks_in.push_back(
|
||||
[=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
|
||||
[=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); });
|
||||
break;
|
||||
case BuiltInSubgroupSize:
|
||||
if (msl_options.emulate_subgroups)
|
||||
{
|
||||
// For subgroup emulation, assume subgroups of size 1.
|
||||
entry_func.fixup_hooks_in.push_back(
|
||||
[=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); });
|
||||
}
|
||||
else if (msl_options.fixed_subgroup_size != 0)
|
||||
{
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
msl_options.fixed_subgroup_size, ";");
|
||||
});
|
||||
}
|
||||
break;
|
||||
case BuiltInSubgroupEqMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupGeMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
if (msl_options.fixed_subgroup_size != 0)
|
||||
add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
// Case where index < 32, size < 32:
|
||||
// mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
|
||||
@ -11034,60 +11187,150 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
||||
// This is further complicated by the fact that if you attempt
|
||||
// to bfi/bfe out-of-bounds on Metal, undefined behavior is the
|
||||
// result.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
|
||||
if (msl_options.fixed_subgroup_size > 32)
|
||||
{
|
||||
// Don't use the subgroup size variable with fixed subgroup sizes,
|
||||
// since the variables could be defined in the wrong order.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 0)), insert_bits(0u, 0xFFFFFFFF,"
|
||||
" (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ",
|
||||
msl_options.fixed_subgroup_size, " - max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 32u)), uint2(0)) & spvSubgroupBallot(true);");
|
||||
}
|
||||
else if (msl_options.fixed_subgroup_size != 0)
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", ",
|
||||
msl_options.fixed_subgroup_size, " - ",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
"), uint3(0)) & spvSubgroupBallot(true);");
|
||||
}
|
||||
else if (msl_options.is_ios())
|
||||
{
|
||||
// On iOS, the SIMD-group size will currently never exceed 32.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", ",
|
||||
to_expression(builtin_subgroup_size_id), " - ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupGtMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
// The same logic applies here, except now the index is one
|
||||
// more than the subgroup invocation ID.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
|
||||
if (msl_options.fixed_subgroup_size > 32)
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ",
|
||||
msl_options.fixed_subgroup_size, " - max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" + 1, 32u)), uint2(0)) & "
|
||||
"spvSubgroupBallot(true);");
|
||||
}
|
||||
else if (msl_options.fixed_subgroup_size != 0)
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
|
||||
msl_options.fixed_subgroup_size, " - ",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" - 1), uint3(0)) & spvSubgroupBallot(true);");
|
||||
}
|
||||
else if (msl_options.is_ios())
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
|
||||
to_expression(builtin_subgroup_size_id), " - ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupLeMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupLtMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
|
||||
if (msl_options.is_ios())
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
|
||||
}
|
||||
else
|
||||
{
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, 0, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case BuiltInViewIndex:
|
||||
@ -12376,10 +12619,23 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
||||
const uint32_t *ops = stream(i);
|
||||
auto op = static_cast<Op>(i.op);
|
||||
|
||||
// Metal 2.0 is required. iOS only supports quad ops. macOS only supports
|
||||
// broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1).
|
||||
// Note that iOS makes no distinction between a quad-group and a subgroup;
|
||||
// all subgroups are quad-groups there.
|
||||
if (msl_options.emulate_subgroups)
|
||||
{
|
||||
// In this mode, only the GroupNonUniform cap is supported. The only op
|
||||
// we need to handle, then, is OpGroupNonUniformElect.
|
||||
if (op != OpGroupNonUniformElect)
|
||||
SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect.");
|
||||
// In this mode, the subgroup size is assumed to be one, so every invocation
|
||||
// is elected.
|
||||
emit_op(ops[0], ops[1], "true", true);
|
||||
return;
|
||||
}
|
||||
|
||||
// Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with
|
||||
// full support in 13.0 (2.2). macOS only supports broadcast and shuffle on
|
||||
// 10.13 (2.0), with full support in 10.14 (2.1).
|
||||
// Note that Apple GPUs before A13 make no distinction between a quad-group
|
||||
// and a SIMD-group; all SIMD-groups are quad-groups on those.
|
||||
if (!msl_options.supports_msl_version(2))
|
||||
SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
|
||||
|
||||
@ -12388,12 +12644,32 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
||||
auto int_type = to_signed_basetype(integer_width);
|
||||
auto uint_type = to_unsigned_basetype(integer_width);
|
||||
|
||||
if (msl_options.is_ios())
|
||||
if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions))
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
default:
|
||||
SPIRV_CROSS_THROW("iOS only supports quad-group operations.");
|
||||
SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up.");
|
||||
case OpGroupNonUniformBroadcastFirst:
|
||||
if (!msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up.");
|
||||
break;
|
||||
case OpGroupNonUniformElect:
|
||||
if (!msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up.");
|
||||
break;
|
||||
case OpGroupNonUniformAny:
|
||||
case OpGroupNonUniformAll:
|
||||
case OpGroupNonUniformAllEqual:
|
||||
case OpGroupNonUniformBallot:
|
||||
case OpGroupNonUniformInverseBallot:
|
||||
case OpGroupNonUniformBallotBitExtract:
|
||||
case OpGroupNonUniformBallotFindLSB:
|
||||
case OpGroupNonUniformBallotFindMSB:
|
||||
case OpGroupNonUniformBallotBitCount:
|
||||
if (!msl_options.supports_msl_version(2, 2))
|
||||
SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up.");
|
||||
break;
|
||||
case OpGroupNonUniformBroadcast:
|
||||
case OpGroupNonUniformShuffle:
|
||||
case OpGroupNonUniformShuffleXor:
|
||||
@ -12430,7 +12706,10 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
||||
switch (op)
|
||||
{
|
||||
case OpGroupNonUniformElect:
|
||||
emit_op(result_type, id, "simd_is_first()", true);
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
emit_op(result_type, id, "quad_is_first()", false);
|
||||
else
|
||||
emit_op(result_type, id, "simd_is_first()", false);
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBroadcast:
|
||||
@ -12501,11 +12780,17 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformAll:
|
||||
emit_unary_func_op(result_type, id, ops[3], "simd_all");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
emit_unary_func_op(result_type, id, ops[3], "quad_all");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, ops[3], "simd_all");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformAny:
|
||||
emit_unary_func_op(result_type, id, ops[3], "simd_any");
|
||||
if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
|
||||
emit_unary_func_op(result_type, id, ops[3], "quad_any");
|
||||
else
|
||||
emit_unary_func_op(result_type, id, ops[3], "simd_any");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformAllEqual:
|
||||
@ -12969,6 +13254,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
|
||||
return "thread_index_in_threadgroup";
|
||||
|
||||
case BuiltInSubgroupSize:
|
||||
if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0)
|
||||
// Shouldn't be reached.
|
||||
SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??");
|
||||
if (execution.model == ExecutionModelFragment)
|
||||
{
|
||||
if (!msl_options.supports_msl_version(2, 2))
|
||||
@ -12983,16 +13271,25 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
|
||||
}
|
||||
|
||||
case BuiltInNumSubgroups:
|
||||
if (msl_options.emulate_subgroups)
|
||||
// Shouldn't be reached.
|
||||
SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation.");
|
||||
if (!msl_options.supports_msl_version(2))
|
||||
SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
|
||||
return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
|
||||
|
||||
case BuiltInSubgroupId:
|
||||
if (msl_options.emulate_subgroups)
|
||||
// Shouldn't be reached.
|
||||
SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation.");
|
||||
if (!msl_options.supports_msl_version(2))
|
||||
SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
|
||||
return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
|
||||
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
if (msl_options.emulate_subgroups)
|
||||
// Shouldn't be reached.
|
||||
SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation.");
|
||||
if (execution.model == ExecutionModelFragment)
|
||||
{
|
||||
if (!msl_options.supports_msl_version(2, 2))
|
||||
|
@ -364,6 +364,28 @@ public:
|
||||
// and will be addressed using the current ViewIndex.
|
||||
bool arrayed_subpass_input = false;
|
||||
|
||||
// Whether to use SIMD-group or quadgroup functions to implement group nnon-uniform
|
||||
// operations. Some GPUs on iOS do not support the SIMD-group functions, only the
|
||||
// quadgroup functions.
|
||||
bool ios_use_simdgroup_functions = false;
|
||||
|
||||
// If set, the subgroup size will be assumed to be one, and subgroup-related
|
||||
// builtins and operations will be emitted accordingly. This mode is intended to
|
||||
// be used by MoltenVK on hardware/software configurations which do not provide
|
||||
// sufficient support for subgroups.
|
||||
bool emulate_subgroups = false;
|
||||
|
||||
// If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control,
|
||||
// allows the SIMD-group size (aka thread execution width) to vary depending on
|
||||
// register usage and requirements. In certain circumstances--for example, a pipeline
|
||||
// in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT--
|
||||
// this is undesirable. This fixes the value of the SubgroupSize builtin, instead of
|
||||
// mapping it to the Metal builtin [[thread_execution_width]]. If the thread
|
||||
// execution width is reduced, the extra invocations will appear to be inactive.
|
||||
// If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped
|
||||
// to the Metal [[thread_execution_width]] builtin.
|
||||
uint32_t fixed_subgroup_size = 0;
|
||||
|
||||
enum class IndexType
|
||||
{
|
||||
None = 0,
|
||||
@ -853,6 +875,8 @@ protected:
|
||||
uint32_t builtin_subgroup_size_id = 0;
|
||||
uint32_t builtin_dispatch_base_id = 0;
|
||||
uint32_t builtin_stage_input_size_id = 0;
|
||||
uint32_t builtin_local_invocation_index_id = 0;
|
||||
uint32_t builtin_workgroup_size_id = 0;
|
||||
uint32_t swizzle_buffer_id = 0;
|
||||
uint32_t buffer_size_buffer_id = 0;
|
||||
uint32_t view_mask_buffer_id = 0;
|
||||
|
@ -308,6 +308,14 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
|
||||
msl_args.append('--msl-arrayed-subpass-input')
|
||||
if '.1d-as-2d.' in shader:
|
||||
msl_args.append('--msl-texture-1d-as-2d')
|
||||
if '.simd.' in shader:
|
||||
msl_args.append('--msl-ios-use-simdgroup-functions')
|
||||
if '.emulate-subgroup.' in shader:
|
||||
msl_args.append('--msl-emulate-subgroups')
|
||||
if '.fixed-subgroup.' in shader:
|
||||
# Arbitrary for testing purposes.
|
||||
msl_args.append('--msl-fixed-subgroup-size')
|
||||
msl_args.append('32')
|
||||
|
||||
subprocess.check_call(msl_args)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user