d1479f871a
This avoids a lot of huge code changes. Arrays generally cannot be copied in and out of buffers, at least no compiler frontend seems to do it. Also avoids a lot of issues surrounding packed vectors and matrices.
75 lines
1.5 KiB
Plaintext
75 lines
1.5 KiB
Plaintext
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct foo
|
|
{
|
|
int a[128];
|
|
uint b;
|
|
float2 c;
|
|
};
|
|
|
|
struct bar
|
|
{
|
|
int d;
|
|
};
|
|
|
|
struct baz
|
|
{
|
|
int e[128];
|
|
};
|
|
|
|
static inline __attribute__((always_inline))
|
|
device int* select_buffer(device foo& buf, device baz& buf2, constant bar& cb)
|
|
{
|
|
return (cb.d != 0) ? &buf.a[0u] : &buf2.e[0u];
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
device int* select_buffer_null(device foo& buf, constant bar& cb)
|
|
{
|
|
return (cb.d != 0) ? &buf.a[0u] : nullptr;
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
threadgroup int* select_tgsm(constant bar& cb, threadgroup int (&tgsm)[128])
|
|
{
|
|
return (cb.d != 0) ? &tgsm[0u] : nullptr;
|
|
}
|
|
|
|
kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], device baz& buf2 [[buffer(2)]])
|
|
{
|
|
threadgroup int tgsm[128];
|
|
device int* sbuf = select_buffer(buf, buf2, cb);
|
|
device int* sbuf2 = select_buffer_null(buf, cb);
|
|
threadgroup int* stgsm = select_tgsm(cb, tgsm);
|
|
threadgroup int* cur = stgsm;
|
|
device int* _73;
|
|
_73 = &buf.a[0u];
|
|
threadgroup int* _76;
|
|
int _77;
|
|
for (;;)
|
|
{
|
|
_76 = cur;
|
|
_77 = *_73;
|
|
if (_77 != 0)
|
|
{
|
|
int _81 = *_76;
|
|
int _82 = _77 + _81;
|
|
*_73 = _82;
|
|
*_76 = _82;
|
|
cur = &_76[1u];
|
|
_73 = &_73[1u];
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|