201 lines
5.3 KiB
Plaintext
201 lines
5.3 KiB
Plaintext
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct BUF
|
|
{
|
|
int a;
|
|
float b;
|
|
float c;
|
|
};
|
|
|
|
constant float _16[2] = { 1.0, 2.0 };
|
|
constant float _19[2] = { 3.0, 4.0 };
|
|
constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } };
|
|
constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } };
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A>
|
|
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromConstantToStack1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromStackToStack1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B>
|
|
inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromConstantToStack2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromStackToStack2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
template<typename T, uint A, uint B, uint C>
|
|
inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
|
|
{
|
|
for (uint i = 0; i < A; i++)
|
|
{
|
|
spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]);
|
|
}
|
|
}
|
|
|
|
kernel void main0(device BUF& o [[buffer(0)]])
|
|
{
|
|
float c[2][2][2];
|
|
spvArrayCopyFromConstantToStack3(c, _21);
|
|
o.a = int(c[1][1][1]);
|
|
float _43[2] = { o.b, o.c };
|
|
float _48[2] = { o.b, o.b };
|
|
float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } };
|
|
float _54[2] = { o.c, o.c };
|
|
float _59[2] = { o.c, o.b };
|
|
float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } };
|
|
float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } };
|
|
float d[2][2][2];
|
|
spvArrayCopyFromStackToStack3(d, _61);
|
|
float e[2][2][2];
|
|
spvArrayCopyFromStackToStack3(e, d);
|
|
o.b = e[1][0][1];
|
|
}
|
|
|