#include #include using namespace metal; constant uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u); struct SSBO { float in_data[1]; }; struct SSBO2 { float out_data[1]; }; kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { threadgroup float sShared[4]; uint ident = gl_GlobalInvocationID.x; float idata = _22.in_data[ident]; sShared[gl_LocalInvocationIndex] = idata; threadgroup_barrier(mem_flags::mem_threadgroup); _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; }