1264e2705e
Metal doesn't support broadcasting or shuffling boolean values, but we can work around that by casting it to `ushort`, then casting it back to `bool`. I used `ushort` instead of `uint` because 16-bit values give better throughput on Apple GPUs.
152 lines
4.3 KiB
Plaintext
152 lines
4.3 KiB
Plaintext
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct SSBO
|
|
{
|
|
float FragColor;
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
|
|
|
template<typename T>
|
|
inline T spvSubgroupShuffle(T value, ushort lane)
|
|
{
|
|
return quad_shuffle(value, lane);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvSubgroupShuffle(bool value, ushort lane)
|
|
{
|
|
return !!quad_shuffle((ushort)value, lane);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
|
|
{
|
|
return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);
|
|
}
|
|
|
|
template<typename T>
|
|
inline T spvSubgroupShuffleXor(T value, ushort mask)
|
|
{
|
|
return quad_shuffle_xor(value, mask);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvSubgroupShuffleXor(bool value, ushort mask)
|
|
{
|
|
return !!quad_shuffle_xor((ushort)value, mask);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
|
|
{
|
|
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);
|
|
}
|
|
|
|
template<typename T>
|
|
inline T spvSubgroupShuffleUp(T value, ushort delta)
|
|
{
|
|
return quad_shuffle_up(value, delta);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvSubgroupShuffleUp(bool value, ushort delta)
|
|
{
|
|
return !!quad_shuffle_up((ushort)value, delta);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
|
|
{
|
|
return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);
|
|
}
|
|
|
|
template<typename T>
|
|
inline T spvSubgroupShuffleDown(T value, ushort delta)
|
|
{
|
|
return quad_shuffle_down(value, delta);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvSubgroupShuffleDown(bool value, ushort delta)
|
|
{
|
|
return !!quad_shuffle_down((ushort)value, delta);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
|
|
{
|
|
return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);
|
|
}
|
|
|
|
template<typename T>
|
|
inline T spvQuadBroadcast(T value, uint lane)
|
|
{
|
|
return quad_broadcast(value, lane);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvQuadBroadcast(bool value, uint lane)
|
|
{
|
|
return !!quad_broadcast((ushort)value, lane);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
|
|
{
|
|
return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
|
|
}
|
|
|
|
template<typename T>
|
|
inline T spvQuadSwap(T value, uint dir)
|
|
{
|
|
return quad_shuffle_xor(value, dir + 1);
|
|
}
|
|
|
|
template<>
|
|
inline bool spvQuadSwap(bool value, uint dir)
|
|
{
|
|
return !!quad_shuffle_xor((ushort)value, dir + 1);
|
|
}
|
|
|
|
template<uint N>
|
|
inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
|
|
{
|
|
return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
|
|
}
|
|
|
|
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
|
|
{
|
|
_9.FragColor = float(gl_NumSubgroups);
|
|
_9.FragColor = float(gl_SubgroupID);
|
|
_9.FragColor = float(gl_SubgroupSize);
|
|
_9.FragColor = float(gl_SubgroupInvocationID);
|
|
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
|
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
|
simdgroup_barrier(mem_flags::mem_device);
|
|
simdgroup_barrier(mem_flags::mem_threadgroup);
|
|
simdgroup_barrier(mem_flags::mem_texture);
|
|
uint shuffled = spvSubgroupShuffle(10u, 8u);
|
|
bool shuffled_bool = spvSubgroupShuffle(true, 9u);
|
|
uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
|
|
bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
|
|
uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
|
|
bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
|
|
uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
|
|
bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
|
|
float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
|
|
bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
|
|
float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
|
|
bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
|
|
float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
|
|
bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
|
|
float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
|
|
bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
|
|
}
|
|
|