Merge pull request #2240 from KhronosGroup/fix-2239
MSL: Remove special case for threadgroup array wrapper.
This commit is contained in:
commit
f349c91274
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Foo0
|
||||
{
|
||||
float a;
|
||||
@ -44,7 +85,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
|
||||
|
||||
kernel void main0(device SSBO& _53 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Foo2 coeffs[64];
|
||||
threadgroup spvUnsafeArray<Foo2, 64> coeffs;
|
||||
coeffs[gl_LocalInvocationIndex] = Foo2{ Foo1{ Foo0{ 0.0 } }, 0.0 };
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_LocalInvocationIndex == 0u)
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float out_data[1];
|
||||
@ -12,7 +53,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
|
||||
|
||||
kernel void main0(device SSBO& _67 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup float foo[4][4];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4> foo;
|
||||
foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
_67.out_data[gl_GlobalInvocationID.x] = ((foo[gl_LocalInvocationID.x][0] + foo[gl_LocalInvocationID.x][1]) + foo[gl_LocalInvocationID.x][2]) + foo[gl_LocalInvocationID.x][3];
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float in_data[1];
|
||||
@ -17,7 +58,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
|
||||
|
||||
kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
|
||||
{
|
||||
threadgroup float sShared[4];
|
||||
threadgroup spvUnsafeArray<float, 4> sShared;
|
||||
sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
_44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex];
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 values[1];
|
||||
@ -12,7 +53,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
|
||||
|
||||
kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup short4 foo[4];
|
||||
threadgroup spvUnsafeArray<short4, 4> foo;
|
||||
foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
|
||||
|
@ -44,114 +44,6 @@ struct spvUnsafeArray
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -171,14 +63,14 @@ struct main0_in
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStorageFoo[8][4][2];
|
||||
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> spvStorageFoo[8];
|
||||
threadgroup auto &Foo = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
|
||||
Foo[gl_InvocationID] = gl_in[gl_InvocationID].iFoo;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
|
@ -44,114 +44,6 @@ struct spvUnsafeArray
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -171,7 +63,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup float4 Foo[4][2];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> Foo;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
@ -181,7 +73,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
|
||||
Foo[gl_InvocationID] = _38;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
|
@ -32,7 +32,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup P spvStorage_11[8];
|
||||
threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup auto &_11 = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
_11.a = 1.0;
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct P
|
||||
{
|
||||
float a;
|
||||
@ -29,7 +70,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup C c[4];
|
||||
threadgroup spvUnsafeArray<C, 4> c;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
patchOut.m_11_a = 1.0;
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct P
|
||||
{
|
||||
float a;
|
||||
@ -30,8 +71,8 @@ struct main0_patchOut
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup C spvStoragec[8][4];
|
||||
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
|
||||
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -16,7 +57,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v0[4];
|
||||
threadgroup spvUnsafeArray<float4, 4> v0;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
v0[gl_InvocationID] = float4(1.0);
|
||||
|
@ -59,8 +59,8 @@ struct main0_patchOut
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStoragev0[8][4];
|
||||
threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<float4, 4> spvStoragev0[8];
|
||||
threadgroup auto &v0 = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -58,7 +58,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v0[4];
|
||||
threadgroup spvUnsafeArray<float4, 4> v0;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
v0[gl_InvocationID] = float4(1.0);
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 v0;
|
||||
@ -19,8 +60,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup float4 spvStoragev1[8][2];
|
||||
threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<float4, 2> spvStoragev1[8];
|
||||
threadgroup auto &v1 = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 v0;
|
||||
@ -17,7 +58,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v1[2];
|
||||
threadgroup spvUnsafeArray<float4, 2> v1;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
|
@ -67,8 +67,8 @@ struct main0_patchOut
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -66,7 +66,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
|
@ -67,8 +67,8 @@ struct main0_patchOut
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -66,7 +66,7 @@ struct main0_patchOut
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
|
@ -300,7 +300,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -342,7 +342,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -318,7 +318,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -136,7 +136,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FFlatTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Block
|
||||
{
|
||||
uint2 _m0[2];
|
||||
@ -16,7 +57,7 @@ struct SSBO
|
||||
|
||||
kernel void main0(device SSBO& ssbo [[buffer(0)]])
|
||||
{
|
||||
threadgroup uint2 _18[2];
|
||||
threadgroup spvUnsafeArray<uint2, 2> _18;
|
||||
ssbo._m0[0u] = Block{ { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] }, { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] } };
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ struct SSBO
|
||||
|
||||
kernel void main0(device SSBO& ssbo [[buffer(0)]])
|
||||
{
|
||||
threadgroup uint2 _18[2];
|
||||
threadgroup spvUnsafeArray<uint2, 2> _18;
|
||||
spvUnsafeArray<uint2, 2> _27;
|
||||
spvArrayCopyFromDeviceToStack1(_27.elements, ssbo._m0[0u]._m1);
|
||||
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);
|
||||
|
@ -165,15 +165,15 @@ struct SSBO
|
||||
|
||||
kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
|
||||
{
|
||||
threadgroup uint2 _18[2];
|
||||
threadgroup spvUnsafeArray<uint2, 2> _18;
|
||||
spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
|
||||
spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
|
||||
spvUnsafeArray<uint2, 2> _23;
|
||||
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _23.elements);
|
||||
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
|
||||
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18.elements);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(_18.elements, ssbo._m0[0u]._m1);
|
||||
spvArrayCopyFromDeviceToStack1(_23.elements, ssbo._m0[0u]._m1);
|
||||
spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
|
||||
spvArrayCopyFromConstantToThreadGroup1(_18.elements, ubo._m0[0u]._m1);
|
||||
spvArrayCopyFromConstantToStack1(_23.elements, ubo._m0[0u]._m1);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,78 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Data
|
||||
{
|
||||
spvUnsafeArray<float3, 16> sourceData;
|
||||
};
|
||||
|
||||
kernel void main0(texture2d<float> g_inputTexture [[texture(0)]], texture2d<uint, access::write> g_output [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup spvUnsafeArray<Data, 64> g_data;
|
||||
uint _49;
|
||||
_49 = 0u;
|
||||
for (; _49 < 4u; _49++)
|
||||
{
|
||||
for (uint _56 = 0u; _56 < 4u; )
|
||||
{
|
||||
int3 _65 = int3(gl_GlobalInvocationID) + int3(int(_56), int(_49), 0);
|
||||
g_data[gl_GlobalInvocationID.x].sourceData[(_49 * 4u) + _56] = g_inputTexture.read(uint2(_65.xy), _65.z).xyz;
|
||||
_56++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
spvUnsafeArray<float3, 16> _45 = g_data[gl_GlobalInvocationID.x].sourceData;
|
||||
uint _77;
|
||||
_77 = 0u;
|
||||
for (int _80 = 0; _80 < 16; )
|
||||
{
|
||||
_77 |= uint(fast::clamp(dot(_45[_80], float3(-1.0)), 0.0, 1.0));
|
||||
_80++;
|
||||
continue;
|
||||
}
|
||||
g_output.write(uint4(_77), uint2(gl_GlobalInvocationID.xy));
|
||||
}
|
||||
|
@ -1,13 +1,54 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
|
||||
|
||||
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup float2 test[64];
|
||||
threadgroup spvUnsafeArray<float2, 64> test;
|
||||
float _21 = float(gl_GlobalInvocationID.x);
|
||||
float2 _22 = float2(_21);
|
||||
((&((&test)[0u]))[0u])[1u + 2u] = _22;
|
||||
|
@ -0,0 +1,55 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
|
||||
|
||||
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup spvUnsafeArray<float2, 64> test;
|
||||
float _21 = float(gl_GlobalInvocationID.x);
|
||||
(true ? &((threadgroup float*)&((&test)[0u])[2u])[0u] : &((threadgroup float*)&((&test)[0u])[2u])[0u])[1u] = _21;
|
||||
}
|
||||
|
@ -1,13 +1,54 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
|
||||
|
||||
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup float2 test[64];
|
||||
threadgroup spvUnsafeArray<float2, 64> test;
|
||||
float _21 = float(gl_GlobalInvocationID.x);
|
||||
((threadgroup float*)&(*(true ? &test[1u] : &test[2u])))[1u] = _21;
|
||||
}
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct foo
|
||||
{
|
||||
int a[128];
|
||||
@ -35,14 +74,14 @@ device int* select_buffer_null(device foo& buf, constant bar& cb)
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
threadgroup int* select_tgsm(constant bar& cb, threadgroup int (&tgsm)[128])
|
||||
threadgroup int* select_tgsm(constant bar& cb, threadgroup spvUnsafeArray<int, 128>& tgsm)
|
||||
{
|
||||
return (cb.d != 0) ? &tgsm[0u] : nullptr;
|
||||
}
|
||||
|
||||
kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], device baz& buf2 [[buffer(2)]])
|
||||
{
|
||||
threadgroup int tgsm[128];
|
||||
threadgroup spvUnsafeArray<int, 128> tgsm;
|
||||
device int* sbuf = select_buffer(buf, buf2, cb);
|
||||
device int* sbuf2 = select_buffer_null(buf, cb);
|
||||
threadgroup int* stgsm = select_tgsm(cb, tgsm);
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
constant uint _15_tmp [[function_constant(0)]];
|
||||
constant uint _15 = is_function_constant_defined(_15_tmp) ? _15_tmp : 1u;
|
||||
constant uint _16_tmp [[function_constant(1)]];
|
||||
@ -30,8 +71,8 @@ constant uchar4 _137 = {};
|
||||
|
||||
kernel void main0(device _6& _25 [[buffer(0)]], constant _8& _29 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup uint _5[256];
|
||||
threadgroup uchar _10[1024];
|
||||
threadgroup spvUnsafeArray<uint, 256> _5;
|
||||
threadgroup spvUnsafeArray<uchar, 1024> _10;
|
||||
uint3 _20 = gl_WorkGroupSize;
|
||||
bool _40 = _29._m0._m0 != 0u;
|
||||
if (_40)
|
||||
|
@ -69,7 +69,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
{
|
||||
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
|
||||
|
||||
threadgroup C c[4];
|
||||
threadgroup spvUnsafeArray<C, 4> c;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
c[gl_InvocationID] = _21[gl_InvocationID];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
|
@ -70,8 +70,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
|
||||
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup C spvStoragec[8][4];
|
||||
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
|
||||
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
c[gl_GlobalInvocationID.x % 4] = _21[gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
patchOut.p_v = float4(0.0);
|
||||
|
@ -72,7 +72,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].c_v = _21[gl_GlobalInvocationID.x % 4].v;
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup P spvStoragep[8];
|
||||
threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup auto &p = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
p = P{ float4(0.0) };
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -83,7 +83,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
|
||||
spvUnsafeArray<gl_PerVertex, 4> _39 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
gl_out[gl_InvocationID].c_v = _21[gl_InvocationID].v;
|
||||
gl_out[gl_InvocationID].gl_Position = _39[gl_InvocationID].gl_Position;
|
||||
|
@ -88,8 +88,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _39[gl_GlobalInvocationID.x % 4].gl_Position;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _39[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _39[gl_GlobalInvocationID.x % 4].gl_CullDistance;
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
gl_out_masked[gl_GlobalInvocationID.x % 4] = _39[gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
patchOut.p_v = float4(0.0);
|
||||
|
@ -83,7 +83,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
spvUnsafeArray<C, 4> _21 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
|
||||
spvUnsafeArray<gl_PerVertex, 4> _39 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
gl_out[gl_InvocationID].c_v = _21[gl_InvocationID].v;
|
||||
gl_out[gl_InvocationID].gl_PointSize = _39[gl_InvocationID].gl_PointSize;
|
||||
|
@ -88,8 +88,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _39[gl_GlobalInvocationID.x % 4].gl_PointSize;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _39[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _39[gl_GlobalInvocationID.x % 4].gl_CullDistance;
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
gl_out_masked[gl_GlobalInvocationID.x % 4] = _39[gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
patchOut.p_v = float4(0.0);
|
||||
|
@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
{
|
||||
spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _32 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
threadgroup float4 foo[4];
|
||||
threadgroup spvUnsafeArray<float4, 4> foo;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
foo[gl_InvocationID] = _17[gl_InvocationID];
|
||||
gl_out[gl_InvocationID].gl_Position = _32[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
|
||||
|
@ -74,8 +74,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _32 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStoragefoo[8][4];
|
||||
threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<float4, 4> spvStoragefoo[8];
|
||||
threadgroup auto &foo = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
foo[gl_GlobalInvocationID.x % 4] = _17[gl_GlobalInvocationID.x % 4];
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
|
||||
|
@ -80,7 +80,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup float4 spvStoragefoo_patch[8];
|
||||
threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup auto &foo_patch = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
foo_patch = float4(0.0);
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
{
|
||||
spvUnsafeArray<gl_PerVertex, 4> _32 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
gl_out[gl_InvocationID].foo = _17[gl_InvocationID];
|
||||
gl_out[gl_InvocationID].gl_Position = _32[gl_InvocationID].gl_Position;
|
||||
|
@ -78,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _32[gl_GlobalInvocationID.x % 4].gl_Position;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _32[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4].gl_CullDistance;
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
gl_out_masked[gl_GlobalInvocationID.x % 4] = _32[gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
patchOut.foo_patch = float4(0.0);
|
||||
|
@ -73,7 +73,7 @@ kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_
|
||||
{
|
||||
spvUnsafeArray<gl_PerVertex, 4> _32 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
|
||||
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
gl_out[gl_InvocationID].foo = _17[gl_InvocationID];
|
||||
gl_out[gl_InvocationID].gl_PointSize = _32[gl_InvocationID].gl_PointSize;
|
||||
|
@ -78,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _32[gl_GlobalInvocationID.x % 4].gl_PointSize;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _32[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
|
||||
gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _32[gl_GlobalInvocationID.x % 4].gl_CullDistance;
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
gl_out_masked[gl_GlobalInvocationID.x % 4] = _32[gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
patchOut.foo_patch = float4(0.0);
|
||||
|
@ -44,129 +44,20 @@ struct spvUnsafeArray
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u);
|
||||
|
||||
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
|
||||
{
|
||||
threadgroup float shared_group[8][8];
|
||||
threadgroup float shared_group_alt[8][8];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float, 8>, 8> shared_group;
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float, 8>, 8> shared_group_alt;
|
||||
spvUnsafeArray<float, 8> blob;
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
blob[i] = float(i);
|
||||
}
|
||||
spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements);
|
||||
shared_group[gl_LocalInvocationIndex] = blob;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
spvUnsafeArray<float, 8> copied_blob;
|
||||
spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]);
|
||||
spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]);
|
||||
spvUnsafeArray<float, 8> copied_blob = shared_group[gl_LocalInvocationIndex ^ 1u];
|
||||
shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex];
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Foo0
|
||||
{
|
||||
float a;
|
||||
@ -52,7 +91,7 @@ void Zero(thread Foo0& v)
|
||||
|
||||
kernel void main0(device SSBO& _53 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Foo2 coeffs[64];
|
||||
threadgroup spvUnsafeArray<Foo2, 64> coeffs;
|
||||
Foo2 data;
|
||||
data.weight = 0.0;
|
||||
Foo0 param;
|
||||
|
@ -1,19 +1,58 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void myfunc(threadgroup int (&foo)[1337])
|
||||
void myfunc(threadgroup spvUnsafeArray<int, 1337>& foo)
|
||||
{
|
||||
foo[0] = 13;
|
||||
}
|
||||
|
||||
kernel void main0()
|
||||
{
|
||||
threadgroup int foo[1337];
|
||||
threadgroup spvUnsafeArray<int, 1337> foo;
|
||||
myfunc(foo);
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float out_data[1];
|
||||
@ -13,7 +52,7 @@ struct SSBO
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& _67, thread uint3& gl_GlobalInvocationID)
|
||||
void work(threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4>& foo, thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& _67, thread uint3& gl_GlobalInvocationID)
|
||||
{
|
||||
foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
@ -27,7 +66,7 @@ void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, th
|
||||
|
||||
kernel void main0(device SSBO& _67 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup float foo[4][4];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float, 4>, 4> foo;
|
||||
work(foo, gl_LocalInvocationID, gl_LocalInvocationIndex, _67, gl_GlobalInvocationID);
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float in_data[1];
|
||||
@ -17,7 +58,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
|
||||
|
||||
kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
|
||||
{
|
||||
threadgroup float sShared[4];
|
||||
threadgroup spvUnsafeArray<float, 4> sShared;
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = _22.in_data[ident];
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 values[1];
|
||||
@ -13,7 +52,7 @@ struct SSBO
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& _23, thread uint3& gl_GlobalInvocationID)
|
||||
void in_function(threadgroup spvUnsafeArray<short4, 4>& foo, thread uint& gl_LocalInvocationIndex, device SSBO& _23, thread uint3& gl_GlobalInvocationID)
|
||||
{
|
||||
foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
@ -22,7 +61,7 @@ void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIn
|
||||
|
||||
kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup short4 foo[4];
|
||||
threadgroup spvUnsafeArray<short4, 4> foo;
|
||||
in_function(foo, gl_LocalInvocationIndex, _23, gl_GlobalInvocationID);
|
||||
}
|
||||
|
||||
|
@ -44,114 +44,6 @@ struct spvUnsafeArray
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -171,14 +63,14 @@ struct main0_in
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStorageFoo[8][4][2];
|
||||
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> spvStorageFoo[8];
|
||||
threadgroup auto &Foo = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
|
||||
Foo[gl_InvocationID] = gl_in[gl_InvocationID].iFoo;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
|
@ -44,114 +44,6 @@ struct spvUnsafeArray
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -171,7 +63,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup float4 Foo[4][2];
|
||||
threadgroup spvUnsafeArray<spvUnsafeArray<float4, 2>, 4> Foo;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
@ -181,7 +73,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
|
||||
Foo[gl_InvocationID] = _38;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
|
@ -44,7 +44,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup P spvStorage_11[8];
|
||||
threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup auto &_11 = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
write_in_function(_11, patchOut, gl_out, gl_InvocationID);
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct P
|
||||
{
|
||||
float a;
|
||||
@ -30,7 +69,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
|
||||
void write_in_function(device main0_patchOut& patchOut, threadgroup spvUnsafeArray<C, 4>& c, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
|
||||
{
|
||||
patchOut.m_11_a = 1.0;
|
||||
patchOut.m_11_b = 2.0;
|
||||
@ -41,7 +80,7 @@ void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], d
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup C c[4];
|
||||
threadgroup spvUnsafeArray<C, 4> c;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_function(patchOut, c, gl_out, gl_InvocationID);
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct P
|
||||
{
|
||||
float a;
|
||||
@ -30,7 +69,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
|
||||
void write_in_function(device main0_patchOut& patchOut, threadgroup spvUnsafeArray<C, 4>& c, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
|
||||
{
|
||||
patchOut.m_11_a = 1.0;
|
||||
patchOut.m_11_b = 2.0;
|
||||
@ -42,8 +81,8 @@ void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], d
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup C spvStoragec[8][4];
|
||||
threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<C, 4> spvStoragec[8];
|
||||
threadgroup auto &c = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
@ -17,7 +56,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out)
|
||||
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out)
|
||||
{
|
||||
v0[gl_InvocationID] = float4(1.0);
|
||||
v0[gl_InvocationID].x = 2.0;
|
||||
@ -34,7 +73,7 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v0[4];
|
||||
threadgroup spvUnsafeArray<float4, 4> v0;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_func(v0, gl_InvocationID, patchOut.v1, gl_out);
|
||||
|
@ -57,7 +57,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
|
||||
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
|
||||
{
|
||||
v0[gl_InvocationID] = float4(1.0);
|
||||
v0[gl_InvocationID].z = 3.0;
|
||||
@ -77,8 +77,8 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStoragev0[8][4];
|
||||
threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<float4, 4> spvStoragev0[8];
|
||||
threadgroup auto &v0 = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -57,7 +57,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
|
||||
void write_in_func(threadgroup spvUnsafeArray<float4, 4>& v0, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
|
||||
{
|
||||
v0[gl_InvocationID] = float4(1.0);
|
||||
v0[gl_InvocationID].z = 3.0;
|
||||
@ -76,7 +76,7 @@ void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, de
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v0[4];
|
||||
threadgroup spvUnsafeArray<float4, 4> v0;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out);
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 v0;
|
||||
@ -18,7 +57,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup spvUnsafeArray<float4, 2>& v1, device float4& v3)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -39,8 +78,8 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
threadgroup float4 spvStoragev1[8][2];
|
||||
threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<float4, 2> spvStoragev1[8];
|
||||
threadgroup auto &v1 = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);
|
||||
|
@ -1,10 +1,49 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 v0;
|
||||
@ -18,7 +57,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup spvUnsafeArray<float4, 2>& v1, device float4& v3)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -37,7 +76,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup float4 v1[2];
|
||||
threadgroup spvUnsafeArray<float4, 2> v1;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);
|
||||
|
@ -65,7 +65,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -85,8 +85,8 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -65,7 +65,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -84,7 +84,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
|
||||
|
@ -65,7 +65,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -85,8 +85,8 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
|
||||
threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> spvStoragegl_out_masked[8];
|
||||
threadgroup auto &gl_out_masked = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
|
||||
|
@ -65,7 +65,7 @@ struct main0_patchOut
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
|
||||
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup spvUnsafeArray<gl_PerVertex, 4>& gl_out_masked)
|
||||
{
|
||||
gl_out[gl_InvocationID].v0 = float4(1.0);
|
||||
gl_out[gl_InvocationID].v0.z = 3.0;
|
||||
@ -84,7 +84,7 @@ void write_in_func(device main0_out* thread & gl_out, thread uint& gl_Invocation
|
||||
|
||||
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
|
||||
{
|
||||
threadgroup gl_PerVertex gl_out_masked[4];
|
||||
threadgroup spvUnsafeArray<gl_PerVertex, 4> gl_out_masked;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
|
||||
|
@ -300,7 +300,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -342,7 +342,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -318,7 +318,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FPNTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
|
@ -136,7 +136,7 @@ struct main0_in
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
|
||||
threadgroup spvUnsafeArray<FFlatTessellationHSToDS, 3> temp_var_hullMainRetVal;
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
|
@ -0,0 +1,137 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google spiregg; 0
|
||||
; Bound: 91
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability StorageImageReadWithoutFormat
|
||||
OpExtension "SPV_GOOGLE_hlsl_functionality1"
|
||||
OpExtension "SPV_GOOGLE_user_type"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %csMain "main" %gl_GlobalInvocationID
|
||||
OpExecutionMode %csMain LocalSize 8 8 1
|
||||
OpSource HLSL 500
|
||||
OpName %Data "Data"
|
||||
OpMemberName %Data 0 "sourceData"
|
||||
OpName %g_data "g_data"
|
||||
OpName %type_2d_image "type.2d.image"
|
||||
OpName %g_inputTexture "g_inputTexture"
|
||||
OpName %type_2d_image_0 "type.2d.image"
|
||||
OpName %g_output "g_output"
|
||||
OpName %csMain "csMain"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID"
|
||||
OpDecorate %g_inputTexture DescriptorSet 0
|
||||
OpDecorate %g_inputTexture Binding 0
|
||||
OpDecorate %g_output DescriptorSet 0
|
||||
OpDecorate %g_output Binding 0
|
||||
OpDecorateString %g_inputTexture UserTypeGOOGLE "texture2d:<float4>"
|
||||
OpDecorateString %g_output UserTypeGOOGLE "rwtexture2d:<uint>"
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%int_16 = OpConstant %int 16
|
||||
%float = OpTypeFloat 32
|
||||
%float_n1 = OpConstant %float -1
|
||||
%v3float = OpTypeVector %float 3
|
||||
%20 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
|
||||
%float_0 = OpConstant %float 0
|
||||
%float_1 = OpConstant %float 1
|
||||
%int_1 = OpConstant %int 1
|
||||
%uint_64 = OpConstant %uint 64
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v3float_uint_16 = OpTypeArray %v3float %uint_16
|
||||
%Data = OpTypeStruct %_arr_v3float_uint_16
|
||||
%_arr_Data_uint_64 = OpTypeArray %Data %uint_64
|
||||
%_ptr_Workgroup__arr_Data_uint_64 = OpTypePointer Workgroup %_arr_Data_uint_64
|
||||
%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
|
||||
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
|
||||
%type_2d_image_0 = OpTypeImage %uint 2D 2 0 0 2 R32ui
|
||||
%_ptr_UniformConstant_type_2d_image_0 = OpTypePointer UniformConstant %type_2d_image_0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%34 = OpTypeFunction %void
|
||||
%v2uint = OpTypeVector %uint 2
|
||||
%_ptr_Function__arr_v3float_uint_16 = OpTypePointer Function %_arr_v3float_uint_16
|
||||
%_ptr_Workgroup__arr_v3float_uint_16 = OpTypePointer Workgroup %_arr_v3float_uint_16
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%bool = OpTypeBool
|
||||
%v3int = OpTypeVector %int 3
|
||||
%v2int = OpTypeVector %int 2
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
|
||||
%g_data = OpVariable %_ptr_Workgroup__arr_Data_uint_64 Workgroup
|
||||
%g_inputTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
|
||||
%g_output = OpVariable %_ptr_UniformConstant_type_2d_image_0 UniformConstant
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%csMain = OpFunction %void None %34
|
||||
%44 = OpLabel
|
||||
%45 = OpVariable %_ptr_Function__arr_v3float_uint_16 Function
|
||||
%46 = OpLoad %v3uint %gl_GlobalInvocationID
|
||||
%47 = OpCompositeExtract %uint %46 0
|
||||
OpBranch %48
|
||||
%48 = OpLabel
|
||||
%49 = OpPhi %uint %uint_0 %44 %50 %51
|
||||
%52 = OpULessThan %bool %49 %uint_4
|
||||
OpLoopMerge %53 %51 None
|
||||
OpBranchConditional %52 %54 %53
|
||||
%54 = OpLabel
|
||||
OpBranch %55
|
||||
%55 = OpLabel
|
||||
%56 = OpPhi %uint %uint_0 %54 %57 %58
|
||||
%59 = OpULessThan %bool %56 %uint_4
|
||||
OpLoopMerge %60 %58 None
|
||||
OpBranchConditional %59 %58 %60
|
||||
%58 = OpLabel
|
||||
%61 = OpBitcast %v3int %46
|
||||
%62 = OpBitcast %int %56
|
||||
%63 = OpBitcast %int %49
|
||||
%64 = OpCompositeConstruct %v3int %62 %63 %int_0
|
||||
%65 = OpIAdd %v3int %61 %64
|
||||
%66 = OpVectorShuffle %v2int %65 %65 0 1
|
||||
%67 = OpCompositeExtract %int %65 2
|
||||
%68 = OpLoad %type_2d_image %g_inputTexture
|
||||
%69 = OpImageFetch %v4float %68 %66 Lod %67
|
||||
%70 = OpVectorShuffle %v3float %69 %69 0 1 2
|
||||
%71 = OpIMul %uint %49 %uint_4
|
||||
%72 = OpIAdd %uint %71 %56
|
||||
%73 = OpAccessChain %_ptr_Workgroup_v3float %g_data %47 %int_0 %72
|
||||
OpStore %73 %70
|
||||
%57 = OpIAdd %uint %56 %uint_1
|
||||
OpBranch %55
|
||||
%60 = OpLabel
|
||||
OpBranch %51
|
||||
%51 = OpLabel
|
||||
%50 = OpIAdd %uint %49 %uint_1
|
||||
OpBranch %48
|
||||
%53 = OpLabel
|
||||
%74 = OpAccessChain %_ptr_Workgroup__arr_v3float_uint_16 %g_data %47 %int_0
|
||||
%75 = OpLoad %_arr_v3float_uint_16 %74
|
||||
OpStore %45 %75
|
||||
OpBranch %76
|
||||
%76 = OpLabel
|
||||
%77 = OpPhi %uint %uint_0 %53 %78 %79
|
||||
%80 = OpPhi %int %int_0 %53 %81 %79
|
||||
%82 = OpSLessThan %bool %80 %int_16
|
||||
OpLoopMerge %83 %79 None
|
||||
OpBranchConditional %82 %79 %83
|
||||
%79 = OpLabel
|
||||
%84 = OpAccessChain %_ptr_Function_v3float %45 %80
|
||||
%85 = OpLoad %v3float %84
|
||||
%86 = OpDot %float %85 %20
|
||||
%87 = OpExtInst %float %1 FClamp %86 %float_0 %float_1
|
||||
%88 = OpConvertFToU %uint %87
|
||||
%78 = OpBitwiseOr %uint %77 %88
|
||||
%81 = OpIAdd %int %80 %int_1
|
||||
OpBranch %76
|
||||
%83 = OpLabel
|
||||
%89 = OpVectorShuffle %v2uint %46 %46 0 1
|
||||
%90 = OpLoad %type_2d_image_0 %g_output
|
||||
OpImageWrite %90 %89 %77 None
|
||||
OpReturn
|
||||
OpFunctionEnd
|
60
shaders-msl-no-opt/asm/comp/variable-pointers-3.asm.comp
Normal file
60
shaders-msl-no-opt/asm/comp/variable-pointers-3.asm.comp
Normal file
@ -0,0 +1,60 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 11
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability VariablePointers
|
||||
OpExtension "SPV_KHR_variable_pointers"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
|
||||
OpExecutionMode %main LocalSize 64 1 1
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %test "test"
|
||||
OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
|
||||
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
|
||||
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%bool = OpTypeBool
|
||||
%true = OpConstantTrue %bool
|
||||
%v2float = OpTypeVector %float 2
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_64 = OpConstant %uint 64
|
||||
%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64
|
||||
%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64
|
||||
%test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_3 = OpConstant %uint 3
|
||||
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
|
||||
%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
|
||||
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%14 = OpLoad %uint %gl_LocalInvocationIndex
|
||||
%19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
|
||||
%20 = OpLoad %uint %19
|
||||
%21 = OpConvertUToF %float %20
|
||||
%22 = OpCompositeConstruct %v2float %21 %21
|
||||
|
||||
; Scalar shenanigans.
|
||||
%ptr6 = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0
|
||||
%ptr6_alt = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0
|
||||
%ptr6_sel = OpSelect %_ptr_Workgroup_float %true %ptr6 %ptr6_alt
|
||||
%ptr7 = OpPtrAccessChain %_ptr_Workgroup_float %ptr6_sel %uint_1
|
||||
OpStore %ptr7 %21
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -1464,7 +1464,7 @@ void CompilerMSL::emit_entry_point_declarations()
|
||||
{
|
||||
auto &var = get<SPIRVariable>(var_id);
|
||||
add_local_variable_name(var_id);
|
||||
statement(variable_decl(var), ";");
|
||||
statement(CompilerGLSL::variable_decl(var), ";");
|
||||
var.deferred_declaration = false;
|
||||
}
|
||||
}
|
||||
@ -3457,9 +3457,6 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
|
||||
auto &type = get_variable_data_type(masked_var);
|
||||
add_local_variable_name(masked_var.self);
|
||||
|
||||
bool old_is_builtin = is_using_builtin_array;
|
||||
is_using_builtin_array = true;
|
||||
|
||||
const uint32_t max_control_points_per_patch = 32u;
|
||||
uint32_t max_num_instances =
|
||||
(max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
|
||||
@ -3475,14 +3472,12 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
|
||||
// since Metal does not allow that. :(
|
||||
// FIXME: We will likely need an option to support passing down target workgroup size,
|
||||
// so we can emit appropriate size here.
|
||||
statement("threadgroup ", type_to_glsl(type), " ",
|
||||
"(&", to_name(masked_var.self), ")",
|
||||
type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[",
|
||||
statement("threadgroup auto ",
|
||||
"&", to_name(masked_var.self),
|
||||
" = spvStorage", to_name(masked_var.self), "[",
|
||||
"(", to_expression(builtin_invocation_id_id), ".x / ",
|
||||
get_entry_point().output_vertices, ") % ",
|
||||
max_num_instances, "];");
|
||||
|
||||
is_using_builtin_array = old_is_builtin;
|
||||
});
|
||||
}
|
||||
else
|
||||
@ -9770,8 +9765,8 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
|
||||
bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
|
||||
bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
|
||||
|
||||
bool lhs_is_array_template = lhs_is_thread_storage;
|
||||
bool rhs_is_array_template = rhs_is_thread_storage;
|
||||
bool lhs_is_array_template = lhs_is_thread_storage || lhs_storage == StorageClassWorkgroup;
|
||||
bool rhs_is_array_template = rhs_is_thread_storage || rhs_storage == StorageClassWorkgroup;
|
||||
|
||||
// Special considerations for stage IO variables.
|
||||
// If the variable is actually backed by non-user visible device storage, we use array templates for those.
|
||||
@ -9786,15 +9781,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
|
||||
auto *lhs_var = maybe_get_backing_variable(lhs_id);
|
||||
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
|
||||
lhs_is_array_template = true;
|
||||
else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) &&
|
||||
type_is_block_like(get<SPIRType>(lhs_var->basetype)))
|
||||
else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
|
||||
lhs_is_array_template = false;
|
||||
|
||||
auto *rhs_var = maybe_get_backing_variable(rhs_id);
|
||||
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
|
||||
rhs_is_array_template = true;
|
||||
else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) &&
|
||||
type_is_block_like(get<SPIRType>(rhs_var->basetype)))
|
||||
else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
|
||||
rhs_is_array_template = false;
|
||||
|
||||
// If threadgroup storage qualifiers are *not* used:
|
||||
@ -14399,9 +14392,6 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
||||
bool builtin = has_decoration(var.self, DecorationBuiltIn);
|
||||
auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn));
|
||||
|
||||
if (address_space == "threadgroup")
|
||||
is_using_builtin_array = true;
|
||||
|
||||
if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
|
||||
decl = join(cv_qualifier, type_to_glsl(type, arg.id));
|
||||
else if (builtin)
|
||||
@ -15327,19 +15317,6 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable
|
||||
}
|
||||
}
|
||||
|
||||
std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
|
||||
{
|
||||
bool old_is_using_builtin_array = is_using_builtin_array;
|
||||
|
||||
// Threadgroup arrays can't have a wrapper type.
|
||||
if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
|
||||
is_using_builtin_array = true;
|
||||
|
||||
auto expr = CompilerGLSL::variable_decl(variable);
|
||||
is_using_builtin_array = old_is_using_builtin_array;
|
||||
return expr;
|
||||
}
|
||||
|
||||
// GCC workaround of lambdas calling protected funcs
|
||||
std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id)
|
||||
{
|
||||
|
@ -857,9 +857,6 @@ protected:
|
||||
std::string type_to_array_glsl(const SPIRType &type) override;
|
||||
std::string constant_op_expression(const SPIRConstantOp &cop) override;
|
||||
|
||||
// Threadgroup arrays can't have a wrapper type
|
||||
std::string variable_decl(const SPIRVariable &variable) override;
|
||||
|
||||
bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
|
||||
|
||||
// GCC workaround of lambdas calling protected functions (for older GCC versions)
|
||||
|
Loading…
Reference in New Issue
Block a user