711300baed
Similar to scalar access chain fix, this causes a read-modify-write on memory we're not supposed to write to.
97 lines
2.7 KiB
Plaintext
97 lines
2.7 KiB
Plaintext
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
#pragma clang diagnostic ignored "-Wmissing-braces"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
template<typename T, size_t Num>
|
|
struct spvUnsafeArray
|
|
{
|
|
T elements[Num ? Num : 1];
|
|
|
|
thread T& operator [] (size_t pos) thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const thread T& operator [] (size_t pos) const thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
device T& operator [] (size_t pos) device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const device T& operator [] (size_t pos) const device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
constexpr const constant T& operator [] (size_t pos) const constant
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
threadgroup T& operator [] (size_t pos) threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
};
|
|
|
|
struct Sub
|
|
{
|
|
spvUnsafeArray<float, 2> f;
|
|
spvUnsafeArray<float2, 2> f2;
|
|
spvUnsafeArray<float3, 2> f3;
|
|
spvUnsafeArray<float4, 2> f4;
|
|
};
|
|
|
|
struct Sub_1
|
|
{
|
|
float4 f[2];
|
|
float4 f2[2];
|
|
float3 f3[2];
|
|
float4 f4[2];
|
|
};
|
|
|
|
struct SSBO
|
|
{
|
|
Sub_1 sub[2];
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
|
|
|
kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
|
{
|
|
Sub foo;
|
|
foo.f[0] = _27.sub[gl_WorkGroupID.x].f[0].x;
|
|
foo.f[1] = _27.sub[gl_WorkGroupID.x].f[1].x;
|
|
foo.f2[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy;
|
|
foo.f2[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy;
|
|
foo.f3[0] = _27.sub[gl_WorkGroupID.x].f3[0];
|
|
foo.f3[1] = _27.sub[gl_WorkGroupID.x].f3[1];
|
|
foo.f4[0] = _27.sub[gl_WorkGroupID.x].f4[0];
|
|
foo.f4[1] = _27.sub[gl_WorkGroupID.x].f4[1];
|
|
foo.f[gl_GlobalInvocationID.x] += 1.0;
|
|
foo.f2[gl_GlobalInvocationID.x] += float2(2.0);
|
|
foo.f3[gl_GlobalInvocationID.x] += float3(3.0);
|
|
foo.f4[gl_GlobalInvocationID.x] += float4(4.0);
|
|
(device float&)_27.sub[gl_WorkGroupID.x].f[0] = foo.f[0];
|
|
(device float&)_27.sub[gl_WorkGroupID.x].f[1] = foo.f[1];
|
|
(device float2&)_27.sub[gl_WorkGroupID.x].f2[0] = foo.f2[0];
|
|
(device float2&)_27.sub[gl_WorkGroupID.x].f2[1] = foo.f2[1];
|
|
_27.sub[gl_WorkGroupID.x].f3[0] = foo.f3[0];
|
|
_27.sub[gl_WorkGroupID.x].f3[1] = foo.f3[1];
|
|
_27.sub[gl_WorkGroupID.x].f4[0] = foo.f4[0];
|
|
_27.sub[gl_WorkGroupID.x].f4[1] = foo.f4[1];
|
|
(device float&)_27.sub[0].f[0] = _27.sub[0].f[0].x + 5.0;
|
|
(device float2&)_27.sub[0].f2[1] = _27.sub[0].f2[1].xy + float2(5.0);
|
|
}
|
|
|