Merge pull request #2240 from KhronosGroup/fix-2239

MSL: Remove special case for threadgroup array wrapper.
This commit is contained in:
Hans-Kristian Arntzen 2023-12-07 14:50:19 +01:00 committed by GitHub
commit f349c91274
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
73 changed files with 1414 additions and 687 deletions

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Foo0
{
float a;
@ -44,7 +85,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
kernel void main0(device SSBO& _53 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Foo2 coeffs[64];
threadgroup spvUnsafeArray<Foo2, 64> coeffs;
coeffs[gl_LocalInvocationIndex] = Foo2{ Foo1{ Foo0{ 0.0 } }, 0.0 };
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationIndex == 0u)

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float out_data[1];
@ -12,7 +53,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
kernel void main0(device SSBO& _67 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup float foo[4][4];
threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4> foo;
foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
threadgroup_barrier(mem_flags::mem_threadgroup);
_67.out_data[gl_GlobalInvocationID.x] = ((foo[gl_LocalInvocationID.x][0] + foo[gl_LocalInvocationID.x][1]) + foo[gl_LocalInvocationID.x][2]) + foo[gl_LocalInvocationID.x][3];

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float in_data[1];
@ -17,7 +58,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float sShared[4];
threadgroup spvUnsafeArray<float, 4> sShared;
sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
threadgroup_barrier(mem_flags::mem_threadgroup);
_44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex];

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float4 values[1];
@ -12,7 +53,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup short4 foo[4];
threadgroup spvUnsafeArray<short4, 4> foo;
foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
threadgroup_barrier(mem_flags::mem_threadgroup);
_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));

View File

@ -44,114 +44,6 @@ struct spvUnsafeArray
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
struct main0_out
{
float4 gl_Position;
@ -171,14 +63,14 @@ struct main0_in
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStorageFoo[8][4][2];
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> spvStorageFoo[8];
threadgroup auto &Foo = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
gl_out[gl_InvocationID].gl_Position = float4(1.0);
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
Foo[gl_InvocationID] = gl_in[gl_InvocationID].iFoo;
if (gl_InvocationID == 0)
{
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });

View File

@ -44,114 +44,6 @@ struct spvUnsafeArray
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
struct main0_out
{
float4 gl_Position;
@ -171,7 +63,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup float4 Foo[4][2];
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> Foo;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])
@ -181,7 +73,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
return;
gl_out[gl_InvocationID].gl_Position = float4(1.0);
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
Foo[gl_InvocationID] = _38;
if (gl_InvocationID == 0)
{
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });

View File

@ -32,7 +32,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup P spvStorage_11[8];
threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup auto &_11 = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
_11.a = 1.0;

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct P
{
float a;
@ -29,7 +70,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup C c[4];
threadgroup spvUnsafeArray<C, 4> c;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
patchOut.m_11_a = 1.0;

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct P
{
float a;
@ -30,8 +71,8 @@ struct main0_patchOut
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup C spvStoragec[8][4];
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 gl_Position;
@ -16,7 +57,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v0[4];
threadgroup spvUnsafeArray<float4, 4> v0;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
v0[gl_InvocationID] = float4(1.0);

View File

@ -59,8 +59,8 @@ struct main0_patchOut
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStoragev0[8][4];
threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<float4, 4> spvStoragev0[8];
threadgroup auto &v0 = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -58,7 +58,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v0[4];
threadgroup spvUnsafeArray<float4, 4> v0;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
v0[gl_InvocationID] = float4(1.0);

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 v0;
@ -19,8 +60,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup float4 spvStoragev1[8][2];
threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<float4, 2> spvStoragev1[8];
threadgroup auto &v1 = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
gl_out[gl_InvocationID].v0 = float4(1.0);

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 v0;
@ -17,7 +58,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v1[2];
threadgroup spvUnsafeArray<float4, 2> v1;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
gl_out[gl_InvocationID].v0 = float4(1.0);

View File

@ -67,8 +67,8 @@ struct main0_patchOut
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -66,7 +66,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
gl_out[gl_InvocationID].v0 = float4(1.0);

View File

@ -67,8 +67,8 @@ struct main0_patchOut
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -66,7 +66,7 @@ struct main0_patchOut
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
gl_out[gl_InvocationID].v0 = float4(1.0);

View File

@ -300,7 +300,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -342,7 +342,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -318,7 +318,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -136,7 +136,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FFlatTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Block
{
uint2 _m0[2];
@ -16,7 +57,7 @@ struct SSBO
kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint2 _18[2];
threadgroup spvUnsafeArray<uint2, 2> _18;
ssbo._m0[0u] = Block{ { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] }, { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] } };
}

View File

@ -165,7 +165,7 @@ struct SSBO
kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint2 _18[2];
threadgroup spvUnsafeArray<uint2, 2> _18;
spvUnsafeArray<uint2, 2> _27;
spvArrayCopyFromDeviceToStack1(_27.elements, ssbo._m0[0u]._m1);
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);

View File

@ -165,15 +165,15 @@ struct SSBO
kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
{
threadgroup uint2 _18[2];
threadgroup spvUnsafeArray<uint2, 2> _18;
spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
spvUnsafeArray<uint2, 2> _23;
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _23.elements);
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18.elements);
spvArrayCopyFromDeviceToThreadGroup1(_18.elements, ssbo._m0[0u]._m1);
spvArrayCopyFromDeviceToStack1(_23.elements, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
spvArrayCopyFromConstantToThreadGroup1(_18.elements, ubo._m0[0u]._m1);
spvArrayCopyFromConstantToStack1(_23.elements, ubo._m0[0u]._m1);
}

View File

@ -0,0 +1,78 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Data
{
spvUnsafeArray<float3, 16> sourceData;
};
kernel void main0(texture2d<float> g_inputTexture [[texture(0)]], texture2d<uint, access::write> g_output [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup spvUnsafeArray<Data, 64> g_data;
uint _49;
_49 = 0u;
for (; _49 < 4u; _49++)
{
for (uint _56 = 0u; _56 < 4u; )
{
int3 _65 = int3(gl_GlobalInvocationID) + int3(int(_56), int(_49), 0);
g_data[gl_GlobalInvocationID.x].sourceData[(_49 * 4u) + _56] = g_inputTexture.read(uint2(_65.xy), _65.z).xyz;
_56++;
continue;
}
}
spvUnsafeArray<float3, 16> _45 = g_data[gl_GlobalInvocationID.x].sourceData;
uint _77;
_77 = 0u;
for (int _80 = 0; _80 < 16; )
{
_77 |= uint(fast::clamp(dot(_45[_80], float3(-1.0)), 0.0, 1.0));
_80++;
continue;
}
g_output.write(uint4(_77), uint2(gl_GlobalInvocationID.xy));
}

View File

@ -1,13 +1,54 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup float2 test[64];
threadgroup spvUnsafeArray<float2, 64> test;
float _21 = float(gl_GlobalInvocationID.x);
float2 _22 = float2(_21);
((&((&test)[0u]))[0u])[1u + 2u] = _22;

View File

@ -0,0 +1,55 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup spvUnsafeArray<float2, 64> test;
float _21 = float(gl_GlobalInvocationID.x);
(true ? &((threadgroup float*)&((&test)[0u])[2u])[0u] : &((threadgroup float*)&((&test)[0u])[2u])[0u])[1u] = _21;
}

View File

@ -1,13 +1,54 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup float2 test[64];
threadgroup spvUnsafeArray<float2, 64> test;
float _21 = float(gl_GlobalInvocationID.x);
((threadgroup float*)&(*(true ? &test[1u] : &test[2u])))[1u] = _21;
}

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct foo
{
int a[128];
@ -35,14 +74,14 @@ device int* select_buffer_null(device foo& buf, constant bar& cb)
}
static inline __attribute__((always_inline))
threadgroup int* select_tgsm(constant bar& cb, threadgroup int (&tgsm)[128])
threadgroup int* select_tgsm(constant bar& cb, threadgroup spvUnsafeArray<int, 128>& tgsm)
{
return (cb.d != 0) ? &tgsm[0u] : nullptr;
}
kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], device baz& buf2 [[buffer(2)]])
{
threadgroup int tgsm[128];
threadgroup spvUnsafeArray<int, 128> tgsm;
device int* sbuf = select_buffer(buf, buf2, cb);
device int* sbuf2 = select_buffer_null(buf, cb);
threadgroup int* stgsm = select_tgsm(cb, tgsm);

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
constant uint _15_tmp [[function_constant(0)]];
constant uint _15 = is_function_constant_defined(_15_tmp) ? _15_tmp : 1u;
constant uint _16_tmp [[function_constant(1)]];
@ -30,8 +71,8 @@ constant uchar4 _137 = {};
kernel void main0(device _6& _25 [[buffer(0)]], constant _8& _29 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup uint _5[256];
threadgroup uchar _10[1024];
threadgroup spvUnsafeArray<uint, 256> _5;
threadgroup spvUnsafeArray<uchar, 1024> _10;
uint3 _20 = gl_WorkGroupSize;
bool _40 = _29._m0._m0 != 0u;
if (_40)

View File

@ -69,7 +69,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
{
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
threadgroup C c[4];
threadgroup spvUnsafeArray<C, 4> c;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
c[gl_InvocationID] = _21[gl_InvocationID];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];

View File

@ -70,8 +70,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup C spvStoragec[8][4];
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
c[gl_GlobalInvocationID.x % 4] = _21[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.p_v = float4(0.0);

View File

@ -72,7 +72,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].c_v = _21[gl_GlobalInvocationID.x % 4].v;
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup P spvStoragep[8];
threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup auto &p = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
p = P{ float4(0.0) };
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -83,7 +83,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
spvUnsafeArray<gl_PerVertex, 4> _39 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
gl_out[gl_InvocationID].c_v = _21[gl_InvocationID].v;
gl_out[gl_InvocationID].gl_Position = _39[gl_InvocationID].gl_Position;

View File

@ -88,8 +88,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _39[gl_GlobalInvocationID.x % 4].gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _39[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _39[gl_GlobalInvocationID.x % 4].gl_CullDistance;
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _39[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.p_v = float4(0.0);

View File

@ -83,7 +83,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
spvUnsafeArray<gl_PerVertex, 4> _39 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
gl_out[gl_InvocationID].c_v = _21[gl_InvocationID].v;
gl_out[gl_InvocationID].gl_PointSize = _39[gl_InvocationID].gl_PointSize;

View File

@ -88,8 +88,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _39[gl_GlobalInvocationID.x % 4].gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _39[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _39[gl_GlobalInvocationID.x % 4].gl_CullDistance;
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _39[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.p_v = float4(0.0);

View File

@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
{
spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _32 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
threadgroup float4 foo[4];
threadgroup spvUnsafeArray<float4, 4> foo;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
foo[gl_InvocationID] = _17[gl_InvocationID];
gl_out[gl_InvocationID].gl_Position = _32[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;

View File

@ -74,8 +74,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _32 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStoragefoo[8][4];
threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<float4, 4> spvStoragefoo[8];
threadgroup auto &foo = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
foo[gl_GlobalInvocationID.x % 4] = _17[gl_GlobalInvocationID.x % 4];
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;

View File

@ -80,7 +80,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup float4 spvStoragefoo_patch[8];
threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup auto &foo_patch = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
foo_patch = float4(0.0);
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
{
spvUnsafeArray<gl_PerVertex, 4> _32 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
gl_out[gl_InvocationID].foo = _17[gl_InvocationID];
gl_out[gl_InvocationID].gl_Position = _32[gl_InvocationID].gl_Position;

View File

@ -78,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _32[gl_GlobalInvocationID.x % 4].gl_Position;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _32[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4].gl_CullDistance;
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _32[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.foo_patch = float4(0.0);

View File

@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
{
spvUnsafeArray<gl_PerVertex, 4> _32 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
gl_out[gl_InvocationID].foo = _17[gl_InvocationID];
gl_out[gl_InvocationID].gl_PointSize = _32[gl_InvocationID].gl_PointSize;

View File

@ -78,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _32[gl_GlobalInvocationID.x % 4].gl_PointSize;
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _32[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4].gl_CullDistance;
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
gl_out_masked[gl_GlobalInvocationID.x % 4] = _32[gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
patchOut.foo_patch = float4(0.0);

View File

@ -44,129 +44,20 @@ struct spvUnsafeArray
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u);
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float shared_group[8][8];
threadgroup float shared_group_alt[8][8];
threadgroup spvUnsafeArray<spvUnsafeArray<float, 8>, 8> shared_group;
threadgroup spvUnsafeArray<spvUnsafeArray<float, 8>, 8> shared_group_alt;
spvUnsafeArray<float, 8> blob;
for (int i = 0; i < 8; i++)
{
blob[i] = float(i);
}
spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements);
shared_group[gl_LocalInvocationIndex] = blob;
threadgroup_barrier(mem_flags::mem_threadgroup);
spvUnsafeArray<float, 8> copied_blob;
spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]);
spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]);
spvUnsafeArray<float, 8> copied_blob = shared_group[gl_LocalInvocationIndex ^ 1u];
shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex];
}

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Foo0
{
float a;
@ -52,7 +91,7 @@ void Zero(thread Foo0& v)
kernel void main0(device SSBO& _53 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Foo2 coeffs[64];
threadgroup spvUnsafeArray<Foo2, 64> coeffs;
Foo2 data;
data.weight = 0.0;
Foo0 param;

View File

@ -1,19 +1,58 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
static inline __attribute__((always_inline))
void myfunc(threadgroup int (&foo)[1337])
void myfunc(threadgroup spvUnsafeArray<int, 1337>& foo)
{
foo[0] = 13;
}
kernel void main0()
{
threadgroup int foo[1337];
threadgroup spvUnsafeArray<int, 1337> foo;
myfunc(foo);
}

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float out_data[1];
@ -13,7 +52,7 @@ struct SSBO
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
static inline __attribute__((always_inline))
void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& _67, thread uint3& gl_GlobalInvocationID)
void work(threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4>& foo, thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& _67, thread uint3& gl_GlobalInvocationID)
{
foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
threadgroup_barrier(mem_flags::mem_threadgroup);
@ -27,7 +66,7 @@ void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, th
kernel void main0(device SSBO& _67 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup float foo[4][4];
threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4> foo;
work(foo, gl_LocalInvocationID, gl_LocalInvocationIndex, _67, gl_GlobalInvocationID);
}

View File

@ -1,8 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float in_data[1];
@ -17,7 +58,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float sShared[4];
threadgroup spvUnsafeArray<float, 4> sShared;
uint ident = gl_GlobalInvocationID.x;
float idata = _22.in_data[ident];
sShared[gl_LocalInvocationIndex] = idata;

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float4 values[1];
@ -13,7 +52,7 @@ struct SSBO
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
static inline __attribute__((always_inline))
void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& _23, thread uint3& gl_GlobalInvocationID)
void in_function(threadgroup spvUnsafeArray<short4, 4>& foo, thread uint& gl_LocalInvocationIndex, device SSBO& _23, thread uint3& gl_GlobalInvocationID)
{
foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
threadgroup_barrier(mem_flags::mem_threadgroup);
@ -22,7 +61,7 @@ void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIn
kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup short4 foo[4];
threadgroup spvUnsafeArray<short4, 4> foo;
in_function(foo, gl_LocalInvocationIndex, _23, gl_GlobalInvocationID);
}

View File

@ -44,114 +44,6 @@ struct spvUnsafeArray
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
struct main0_out
{
float4 gl_Position;
@ -171,14 +63,14 @@ struct main0_in
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStorageFoo[8][4][2];
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> spvStorageFoo[8];
threadgroup auto &Foo = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
gl_out[gl_InvocationID].gl_Position = float4(1.0);
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
Foo[gl_InvocationID] = gl_in[gl_InvocationID].iFoo;
if (gl_InvocationID == 0)
{
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });

View File

@ -44,114 +44,6 @@ struct spvUnsafeArray
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
struct main0_out
{
float4 gl_Position;
@ -171,7 +63,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup float4 Foo[4][2];
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> Foo;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])
@ -181,7 +73,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
return;
gl_out[gl_InvocationID].gl_Position = float4(1.0);
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
Foo[gl_InvocationID] = _38;
if (gl_InvocationID == 0)
{
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });

View File

@ -44,7 +44,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup P spvStorage_11[8];
threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup auto &_11 = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
write_in_function(_11, patchOut, gl_out, gl_InvocationID);

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct P
{
float a;
@ -30,7 +69,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
void write_in_function(device main0_patchOut& patchOut, threadgroup spvUnsafeArray<C, 4>& c, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
{
patchOut.m_11_a = 1.0;
patchOut.m_11_b = 2.0;
@ -41,7 +80,7 @@ void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], d
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup C c[4];
threadgroup spvUnsafeArray<C, 4> c;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_function(patchOut, c, gl_out, gl_InvocationID);

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct P
{
float a;
@ -30,7 +69,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
void write_in_function(device main0_patchOut& patchOut, threadgroup spvUnsafeArray<C, 4>& c, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
{
patchOut.m_11_a = 1.0;
patchOut.m_11_b = 2.0;
@ -42,8 +81,8 @@ void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], d
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup C spvStoragec[8][4];
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 gl_Position;
@ -17,7 +56,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out)
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out)
{
v0[gl_InvocationID] = float4(1.0);
v0[gl_InvocationID].x = 2.0;
@ -34,7 +73,7 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v0[4];
threadgroup spvUnsafeArray<float4, 4> v0;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(v0, gl_InvocationID, patchOut.v1, gl_out);

View File

@ -57,7 +57,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
{
v0[gl_InvocationID] = float4(1.0);
v0[gl_InvocationID].z = 3.0;
@ -77,8 +77,8 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup float4 spvStoragev0[8][4];
threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<float4, 4> spvStoragev0[8];
threadgroup auto &v0 = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -57,7 +57,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
{
v0[gl_InvocationID] = float4(1.0);
v0[gl_InvocationID].z = 3.0;
@ -76,7 +76,7 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v0[4];
threadgroup spvUnsafeArray<float4, 4> v0;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out);

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 v0;
@ -18,7 +57,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup spvUnsafeArray<float4, 2>& v1, device float4& v3)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -39,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
threadgroup float4 spvStoragev1[8][2];
threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<float4, 2> spvStoragev1[8];
threadgroup auto &v1 = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);

View File

@ -1,10 +1,49 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct main0_out
{
float4 v0;
@ -18,7 +57,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup spvUnsafeArray<float4, 2>& v1, device float4& v3)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -37,7 +76,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup float4 v1[2];
threadgroup spvUnsafeArray<float4, 2> v1;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);

View File

@ -65,7 +65,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -85,8 +85,8 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -65,7 +65,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -84,7 +84,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);

View File

@ -65,7 +65,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -85,8 +85,8 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);

View File

@ -65,7 +65,7 @@ struct main0_patchOut
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
{
gl_out[gl_InvocationID].v0 = float4(1.0);
gl_out[gl_InvocationID].v0.z = 3.0;
@ -84,7 +84,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
threadgroup gl_PerVertex gl_out_masked[4];
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);

View File

@ -300,7 +300,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -342,7 +342,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -318,7 +318,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])

View File

@ -136,7 +136,7 @@ struct main0_in
kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
threadgroup spvUnsafeArray<FFlatTessellationHSToDS, 3> temp_var_hullMainRetVal;
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;

View File

@ -0,0 +1,137 @@
; SPIR-V
; Version: 1.3
; Generator: Google spiregg; 0
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability StorageImageReadWithoutFormat
OpExtension "SPV_GOOGLE_hlsl_functionality1"
OpExtension "SPV_GOOGLE_user_type"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %csMain "main" %gl_GlobalInvocationID
OpExecutionMode %csMain LocalSize 8 8 1
OpSource HLSL 500
OpName %Data "Data"
OpMemberName %Data 0 "sourceData"
OpName %g_data "g_data"
OpName %type_2d_image "type.2d.image"
OpName %g_inputTexture "g_inputTexture"
OpName %type_2d_image_0 "type.2d.image"
OpName %g_output "g_output"
OpName %csMain "csMain"
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID"
OpDecorate %g_inputTexture DescriptorSet 0
OpDecorate %g_inputTexture Binding 0
OpDecorate %g_output DescriptorSet 0
OpDecorate %g_output Binding 0
OpDecorateString %g_inputTexture UserTypeGOOGLE "texture2d:<float4>"
OpDecorateString %g_output UserTypeGOOGLE "rwtexture2d:<uint>"
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_4 = OpConstant %uint 4
%uint_1 = OpConstant %uint 1
%int_16 = OpConstant %int 16
%float = OpTypeFloat 32
%float_n1 = OpConstant %float -1
%v3float = OpTypeVector %float 3
%20 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
%float_0 = OpConstant %float 0
%float_1 = OpConstant %float 1
%int_1 = OpConstant %int 1
%uint_64 = OpConstant %uint 64
%uint_16 = OpConstant %uint 16
%_arr_v3float_uint_16 = OpTypeArray %v3float %uint_16
%Data = OpTypeStruct %_arr_v3float_uint_16
%_arr_Data_uint_64 = OpTypeArray %Data %uint_64
%_ptr_Workgroup__arr_Data_uint_64 = OpTypePointer Workgroup %_arr_Data_uint_64
%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
%type_2d_image_0 = OpTypeImage %uint 2D 2 0 0 2 R32ui
%_ptr_UniformConstant_type_2d_image_0 = OpTypePointer UniformConstant %type_2d_image_0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%void = OpTypeVoid
%34 = OpTypeFunction %void
%v2uint = OpTypeVector %uint 2
%_ptr_Function__arr_v3float_uint_16 = OpTypePointer Function %_arr_v3float_uint_16
%_ptr_Workgroup__arr_v3float_uint_16 = OpTypePointer Workgroup %_arr_v3float_uint_16
%_ptr_Function_v3float = OpTypePointer Function %v3float
%bool = OpTypeBool
%v3int = OpTypeVector %int 3
%v2int = OpTypeVector %int 2
%v4float = OpTypeVector %float 4
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
%g_data = OpVariable %_ptr_Workgroup__arr_Data_uint_64 Workgroup
%g_inputTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
%g_output = OpVariable %_ptr_UniformConstant_type_2d_image_0 UniformConstant
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
%csMain = OpFunction %void None %34
%44 = OpLabel
%45 = OpVariable %_ptr_Function__arr_v3float_uint_16 Function
%46 = OpLoad %v3uint %gl_GlobalInvocationID
%47 = OpCompositeExtract %uint %46 0
OpBranch %48
%48 = OpLabel
%49 = OpPhi %uint %uint_0 %44 %50 %51
%52 = OpULessThan %bool %49 %uint_4
OpLoopMerge %53 %51 None
OpBranchConditional %52 %54 %53
%54 = OpLabel
OpBranch %55
%55 = OpLabel
%56 = OpPhi %uint %uint_0 %54 %57 %58
%59 = OpULessThan %bool %56 %uint_4
OpLoopMerge %60 %58 None
OpBranchConditional %59 %58 %60
%58 = OpLabel
%61 = OpBitcast %v3int %46
%62 = OpBitcast %int %56
%63 = OpBitcast %int %49
%64 = OpCompositeConstruct %v3int %62 %63 %int_0
%65 = OpIAdd %v3int %61 %64
%66 = OpVectorShuffle %v2int %65 %65 0 1
%67 = OpCompositeExtract %int %65 2
%68 = OpLoad %type_2d_image %g_inputTexture
%69 = OpImageFetch %v4float %68 %66 Lod %67
%70 = OpVectorShuffle %v3float %69 %69 0 1 2
%71 = OpIMul %uint %49 %uint_4
%72 = OpIAdd %uint %71 %56
%73 = OpAccessChain %_ptr_Workgroup_v3float %g_data %47 %int_0 %72
OpStore %73 %70
%57 = OpIAdd %uint %56 %uint_1
OpBranch %55
%60 = OpLabel
OpBranch %51
%51 = OpLabel
%50 = OpIAdd %uint %49 %uint_1
OpBranch %48
%53 = OpLabel
%74 = OpAccessChain %_ptr_Workgroup__arr_v3float_uint_16 %g_data %47 %int_0
%75 = OpLoad %_arr_v3float_uint_16 %74
OpStore %45 %75
OpBranch %76
%76 = OpLabel
%77 = OpPhi %uint %uint_0 %53 %78 %79
%80 = OpPhi %int %int_0 %53 %81 %79
%82 = OpSLessThan %bool %80 %int_16
OpLoopMerge %83 %79 None
OpBranchConditional %82 %79 %83
%79 = OpLabel
%84 = OpAccessChain %_ptr_Function_v3float %45 %80
%85 = OpLoad %v3float %84
%86 = OpDot %float %85 %20
%87 = OpExtInst %float %1 FClamp %86 %float_0 %float_1
%88 = OpConvertFToU %uint %87
%78 = OpBitwiseOr %uint %77 %88
%81 = OpIAdd %int %80 %int_1
OpBranch %76
%83 = OpLabel
%89 = OpVectorShuffle %v2uint %46 %46 0 1
%90 = OpLoad %type_2d_image_0 %g_output
OpImageWrite %90 %89 %77 None
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,60 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 11
; Bound: 26
; Schema: 0
OpCapability Shader
OpCapability VariablePointers
OpExtension "SPV_KHR_variable_pointers"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
OpExecutionMode %main LocalSize 64 1 1
OpSource GLSL 450
OpName %main "main"
OpName %test "test"
OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%bool = OpTypeBool
%true = OpConstantTrue %bool
%v2float = OpTypeVector %float 2
%uint = OpTypeInt 32 0
%uint_64 = OpConstant %uint 64
%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64
%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64
%test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
%main = OpFunction %void None %3
%5 = OpLabel
%14 = OpLoad %uint %gl_LocalInvocationIndex
%19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
%20 = OpLoad %uint %19
%21 = OpConvertUToF %float %20
%22 = OpCompositeConstruct %v2float %21 %21
; Scalar shenanigans.
%ptr6 = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0
%ptr6_alt = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0
%ptr6_sel = OpSelect %_ptr_Workgroup_float %true %ptr6 %ptr6_alt
%ptr7 = OpPtrAccessChain %_ptr_Workgroup_float %ptr6_sel %uint_1
OpStore %ptr7 %21
OpReturn
OpFunctionEnd

View File

@ -1464,7 +1464,7 @@ void CompilerMSL::emit_entry_point_declarations()
{
auto &var = get<SPIRVariable>(var_id);
add_local_variable_name(var_id);
statement(variable_decl(var), ";");
statement(CompilerGLSL::variable_decl(var), ";");
var.deferred_declaration = false;
}
}
@ -3457,9 +3457,6 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
auto &type = get_variable_data_type(masked_var);
add_local_variable_name(masked_var.self);
bool old_is_builtin = is_using_builtin_array;
is_using_builtin_array = true;
const uint32_t max_control_points_per_patch = 32u;
uint32_t max_num_instances =
(max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
@ -3475,14 +3472,12 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
// since Metal does not allow that. :(
// FIXME: We will likely need an option to support passing down target workgroup size,
// so we can emit appropriate size here.
statement("threadgroup ", type_to_glsl(type), " ",
"(&", to_name(masked_var.self), ")",
type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[",
statement("threadgroup auto ",
"&", to_name(masked_var.self),
" = spvStorage", to_name(masked_var.self), "[",
"(", to_expression(builtin_invocation_id_id), ".x / ",
get_entry_point().output_vertices, ") % ",
max_num_instances, "];");
is_using_builtin_array = old_is_builtin;
});
}
else
@ -9770,8 +9765,8 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
bool lhs_is_array_template = lhs_is_thread_storage;
bool rhs_is_array_template = rhs_is_thread_storage;
bool lhs_is_array_template = lhs_is_thread_storage || lhs_storage == StorageClassWorkgroup;
bool rhs_is_array_template = rhs_is_thread_storage || rhs_storage == StorageClassWorkgroup;
// Special considerations for stage IO variables.
// If the variable is actually backed by non-user visible device storage, we use array templates for those.
@ -9786,15 +9781,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
auto *lhs_var = maybe_get_backing_variable(lhs_id);
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
lhs_is_array_template = true;
else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) &&
type_is_block_like(get<SPIRType>(lhs_var->basetype)))
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
lhs_is_array_template = false;
auto *rhs_var = maybe_get_backing_variable(rhs_id);
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
rhs_is_array_template = true;
else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) &&
type_is_block_like(get<SPIRType>(rhs_var->basetype)))
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
rhs_is_array_template = false;
// If threadgroup storage qualifiers are *not* used:
@ -14399,9 +14392,6 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
bool builtin = has_decoration(var.self, DecorationBuiltIn);
auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn));
if (address_space == "threadgroup")
is_using_builtin_array = true;
if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
decl = join(cv_qualifier, type_to_glsl(type, arg.id));
else if (builtin)
@ -15327,19 +15317,6 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable
}
}
std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
{
bool old_is_using_builtin_array = is_using_builtin_array;
// Threadgroup arrays can't have a wrapper type.
if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
is_using_builtin_array = true;
auto expr = CompilerGLSL::variable_decl(variable);
is_using_builtin_array = old_is_using_builtin_array;
return expr;
}
// GCC workaround of lambdas calling protected funcs
std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id)
{

View File

@ -857,9 +857,6 @@ protected:
std::string type_to_array_glsl(const SPIRType &type) override;
std::string constant_op_expression(const SPIRConstantOp &cop) override;
// Threadgroup arrays can't have a wrapper type
std::string variable_decl(const SPIRVariable &variable) override;
bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
// GCC workaround of lambdas calling protected functions (for older GCC versions)