80 lines
2.0 KiB
Plaintext
80 lines
2.0 KiB
Plaintext
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||
|
|
||
|
#include <metal_stdlib>
|
||
|
#include <simd/simd.h>
|
||
|
|
||
|
using namespace metal;
|
||
|
|
||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u);
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<typename T, uint A>
|
||
|
void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||
|
{
|
||
|
for (uint i = 0; i < A; i++)
|
||
|
{
|
||
|
dst[i] = src[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
|
||
|
{
|
||
|
threadgroup float shared_group[8][8];
|
||
|
threadgroup float shared_group_alt[8][8];
|
||
|
float blob[8];
|
||
|
for (int i = 0; i < 8; i++)
|
||
|
{
|
||
|
blob[i] = float(i);
|
||
|
}
|
||
|
spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob);
|
||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||
|
float copied_blob[8];
|
||
|
spvArrayCopyFromThreadGroupToStack1(copied_blob, shared_group[gl_LocalInvocationIndex ^ 1u]);
|
||
|
spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]);
|
||
|
}
|
||
|
|