#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; template struct spvUnsafeArray { T elements[Num ? Num : 1]; thread T& operator [] (size_t pos) thread { return elements[pos]; } constexpr const thread T& operator [] (size_t pos) const thread { return elements[pos]; } device T& operator [] (size_t pos) device { return elements[pos]; } constexpr const device T& operator [] (size_t pos) const device { return elements[pos]; } constexpr const constant T& operator [] (size_t pos) const constant { return elements[pos]; } threadgroup T& operator [] (size_t pos) threadgroup { return elements[pos]; } constexpr const threadgroup T& operator [] (size_t pos) const threadgroup { return elements[pos]; } }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u); template inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } template inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } template inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } template inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } template inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } template inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) { for (uint i = 0; i < A; i++) { dst[i] = src[i]; } } kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { threadgroup float shared_group[8][8]; threadgroup float shared_group_alt[8][8]; spvUnsafeArray blob; for (int i = 0; i < 8; i++) { blob[i] = float(i); } spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements); threadgroup_barrier(mem_flags::mem_threadgroup); spvUnsafeArray copied_blob; spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]); spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]); }