MSL: Fix copy of arrays to/from stage IO variables.
Need to take into account effective storage classes and whether or not we target stage IO blocks since native arrays are conditionally enabled.
This commit is contained in:
parent
986196030d
commit
ae9ca7d73c
@ -0,0 +1,188 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
spvUnsafeArray<float4, 2> iFoo;
|
||||
float4 ipFoo;
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStorageFoo[8][4][2];
|
||||
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,191 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 iFoo_0 [[attribute(0)]];
|
||||
float4 iFoo_1 [[attribute(1)]];
|
||||
float4 ipFoo [[attribute(2)]];
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup float4 Foo[4][2];
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_InvocationID >= 4)
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,79 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
spvUnsafeArray<float4, 2> Foo;
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
spvUnsafeArray<float4, 2> iFoo;
|
||||
float4 ipFoo;
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,83 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
spvUnsafeArray<float4, 2> Foo;
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 iFoo_0 [[attribute(0)]];
|
||||
float4 iFoo_1 [[attribute(1)]];
|
||||
float4 ipFoo [[attribute(2)]];
|
||||
};
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_InvocationID >= 4)
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
gl_out[gl_InvocationID].Foo = _38;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,188 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
spvUnsafeArray<float4, 2> iFoo;
|
||||
float4 ipFoo;
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
threadgroup float4 spvStorageFoo[8][4][2];
|
||||
threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,191 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 iFoo_0 [[attribute(0)]];
|
||||
float4 iFoo_1 [[attribute(1)]];
|
||||
float4 ipFoo [[attribute(2)]];
|
||||
};
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, uint A>
|
||||
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
|
||||
{
|
||||
for (uint i = 0; i < A; i++)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
threadgroup float4 Foo[4][2];
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_InvocationID >= 4)
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,79 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
spvUnsafeArray<float4, 2> Foo;
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
spvUnsafeArray<float4, 2> iFoo;
|
||||
float4 ipFoo;
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
|
||||
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
|
||||
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
|
||||
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,83 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
spvUnsafeArray<float4, 2> Foo;
|
||||
float4 gl_Position;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
spvUnsafeArray<float4, 2> pFoo;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 iFoo_0 [[attribute(0)]];
|
||||
float4 iFoo_1 [[attribute(1)]];
|
||||
float4 ipFoo [[attribute(2)]];
|
||||
};
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_InvocationID >= 4)
|
||||
return;
|
||||
gl_out[gl_InvocationID].gl_Position = float4(1.0);
|
||||
spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
|
||||
gl_out[gl_InvocationID].Foo = _38;
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
|
||||
patchOut.pFoo = _56;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
#version 450
|
||||
|
||||
layout(vertices = 4) out;
|
||||
layout(location = 0) out vec4 Foo[][2];
|
||||
layout(location = 0) in vec4 iFoo[][2];
|
||||
layout(location = 2) patch out vec4 pFoo[2];
|
||||
layout(location = 2) in vec4 ipFoo[];
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_out[gl_InvocationID].gl_Position = vec4(1.0);
|
||||
Foo[gl_InvocationID] = iFoo[gl_InvocationID];
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
pFoo = vec4[](ipFoo[0], ipFoo[1]);
|
||||
}
|
||||
}
|
17
shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
Normal file
17
shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
Normal file
@ -0,0 +1,17 @@
|
||||
#version 450
|
||||
|
||||
layout(vertices = 4) out;
|
||||
layout(location = 0) out vec4 Foo[][2];
|
||||
layout(location = 0) in vec4 iFoo[][2];
|
||||
layout(location = 2) patch out vec4 pFoo[2];
|
||||
layout(location = 2) in vec4 ipFoo[];
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_out[gl_InvocationID].gl_Position = vec4(1.0);
|
||||
Foo[gl_InvocationID] = iFoo[gl_InvocationID];
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
pFoo = vec4[](ipFoo[0], ipFoo[1]);
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
#version 450
|
||||
|
||||
layout(vertices = 4) out;
|
||||
layout(location = 0) out vec4 Foo[][2];
|
||||
layout(location = 0) in vec4 iFoo[][2];
|
||||
layout(location = 2) patch out vec4 pFoo[2];
|
||||
layout(location = 2) in vec4 ipFoo[];
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_out[gl_InvocationID].gl_Position = vec4(1.0);
|
||||
Foo[gl_InvocationID] = iFoo[gl_InvocationID];
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
pFoo = vec4[](ipFoo[0], ipFoo[1]);
|
||||
}
|
||||
}
|
17
shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
Normal file
17
shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
Normal file
@ -0,0 +1,17 @@
|
||||
#version 450
|
||||
|
||||
layout(vertices = 4) out;
|
||||
layout(location = 0) out vec4 Foo[][2];
|
||||
layout(location = 0) in vec4 iFoo[][2];
|
||||
layout(location = 2) patch out vec4 pFoo[2];
|
||||
layout(location = 2) in vec4 ipFoo[];
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_out[gl_InvocationID].gl_Position = vec4(1.0);
|
||||
Foo[gl_InvocationID] = iFoo[gl_InvocationID];
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
pFoo = vec4[](ipFoo[0], ipFoo[1]);
|
||||
}
|
||||
}
|
@ -284,31 +284,6 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
|
||||
return var;
|
||||
}
|
||||
|
||||
StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
|
||||
{
|
||||
auto *var = maybe_get_backing_variable(ptr);
|
||||
|
||||
// If the expression has been lowered to a temporary, we need to use the Generic storage class.
|
||||
// We're looking for the effective storage class of a given expression.
|
||||
// An access chain or forwarded OpLoads from such access chains
|
||||
// will generally have the storage class of the underlying variable, but if the load was not forwarded
|
||||
// we have lost any address space qualifiers.
|
||||
bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
|
||||
(forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
|
||||
|
||||
if (var && !forced_temporary)
|
||||
{
|
||||
// Normalize SSBOs to StorageBuffer here.
|
||||
if (var->storage == StorageClassUniform &&
|
||||
has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
|
||||
return StorageClassStorageBuffer;
|
||||
else
|
||||
return var->storage;
|
||||
}
|
||||
else
|
||||
return expression_type(ptr).storage;
|
||||
}
|
||||
|
||||
void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
|
||||
{
|
||||
auto &e = get<SPIRExpression>(expr);
|
||||
|
@ -671,7 +671,6 @@ protected:
|
||||
bool expression_is_lvalue(uint32_t id) const;
|
||||
bool variable_storage_is_aliased(const SPIRVariable &var);
|
||||
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
|
||||
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
|
||||
|
||||
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
|
||||
void register_write(uint32_t chain);
|
||||
|
@ -8661,12 +8661,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
bool ignore_potential_sliced_writes = false;
|
||||
if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
|
||||
{
|
||||
auto *var = maybe_get_backing_variable(base);
|
||||
if (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
|
||||
effective_storage = StorageClassWorkgroup;
|
||||
else if (var && variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
|
||||
effective_storage = StorageClassStorageBuffer;
|
||||
else if (expression_type(base).pointer)
|
||||
if (expression_type(base).pointer)
|
||||
effective_storage = get_expression_effective_storage_class(base);
|
||||
|
||||
// Special consideration for control points.
|
||||
@ -8674,6 +8669,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
// to consider scalar access chains here.
|
||||
// Cleans up some cases where it's very painful to determine the accurate storage class
|
||||
// since blocks can be partially masked ...
|
||||
auto *var = maybe_get_backing_variable(base);
|
||||
if (var && var->storage == StorageClassOutput &&
|
||||
get_execution_model() == ExecutionModelTessellationControl &&
|
||||
!has_decoration(var->self, DecorationPatch))
|
||||
@ -9911,7 +9907,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
// it is an array, and our backend does not support arrays as value types.
|
||||
// Emit the temporary, and copy it explicitly.
|
||||
e = &emit_uninitialized_temporary_expression(result_type, id);
|
||||
emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
|
||||
emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
|
||||
}
|
||||
else
|
||||
e = &emit_op(result_type, id, expr, forward, !usage_tracking);
|
||||
@ -14794,7 +14790,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
// The backend is responsible for setting this up, and redirection the return values as appropriate.
|
||||
if (ir.ids[block.return_value].get_type() != TypeUndef)
|
||||
{
|
||||
emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
|
||||
emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
|
||||
get_expression_effective_storage_class(block.return_value));
|
||||
}
|
||||
|
||||
@ -15013,7 +15009,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
|
||||
MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
|
||||
void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
|
||||
{
|
||||
statement(lhs, " = ", to_expression(rhs_id), ";");
|
||||
}
|
||||
@ -15923,3 +15919,33 @@ uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var,
|
||||
|
||||
return location;
|
||||
}
|
||||
|
||||
StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
|
||||
{
|
||||
auto *var = maybe_get_backing_variable(ptr);
|
||||
|
||||
// If the expression has been lowered to a temporary, we need to use the Generic storage class.
|
||||
// We're looking for the effective storage class of a given expression.
|
||||
// An access chain or forwarded OpLoads from such access chains
|
||||
// will generally have the storage class of the underlying variable, but if the load was not forwarded
|
||||
// we have lost any address space qualifiers.
|
||||
bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
|
||||
(forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
|
||||
|
||||
if (var && !forced_temporary)
|
||||
{
|
||||
if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
|
||||
return StorageClassWorkgroup;
|
||||
if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
|
||||
return StorageClassStorageBuffer;
|
||||
|
||||
// Normalize SSBOs to StorageBuffer here.
|
||||
if (var->storage == StorageClassUniform &&
|
||||
has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
|
||||
return StorageClassStorageBuffer;
|
||||
else
|
||||
return var->storage;
|
||||
}
|
||||
else
|
||||
return expression_type(ptr).storage;
|
||||
}
|
||||
|
@ -673,6 +673,7 @@ protected:
|
||||
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
|
||||
AccessChainMeta *meta);
|
||||
|
||||
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
|
||||
virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
|
||||
|
||||
virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
|
||||
@ -739,8 +740,8 @@ protected:
|
||||
std::string layout_for_variable(const SPIRVariable &variable);
|
||||
std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
|
||||
virtual bool skip_argument(uint32_t id) const;
|
||||
virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
|
||||
spv::StorageClass rhs_storage);
|
||||
virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
|
||||
spv::StorageClass lhs_storage, spv::StorageClass rhs_storage);
|
||||
virtual void emit_block_hints(const SPIRBlock &block);
|
||||
virtual std::string to_initializer_expression(const SPIRVariable &var);
|
||||
virtual std::string to_zero_initialized_expression(uint32_t type_id);
|
||||
|
@ -8397,20 +8397,46 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
|
||||
flush_all_active_variables();
|
||||
}
|
||||
|
||||
void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage,
|
||||
StorageClass rhs_storage)
|
||||
static bool storage_class_array_is_thread(StorageClass storage)
|
||||
{
|
||||
switch (storage)
|
||||
{
|
||||
case StorageClassInput:
|
||||
case StorageClassOutput:
|
||||
case StorageClassGeneric:
|
||||
case StorageClassFunction:
|
||||
case StorageClassPrivate:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id,
|
||||
StorageClass lhs_storage, StorageClass rhs_storage)
|
||||
{
|
||||
// Allow Metal to use the array<T> template to make arrays a value type.
|
||||
// This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
|
||||
bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction ||
|
||||
lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate);
|
||||
bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction ||
|
||||
rhs_storage == StorageClassOutput ||
|
||||
rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate);
|
||||
bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
|
||||
bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
|
||||
|
||||
bool lhs_is_array_template = lhs_is_thread_storage;
|
||||
bool rhs_is_array_template = rhs_is_thread_storage;
|
||||
|
||||
// Special considerations for stage IO variables.
|
||||
// If the variable is actually backed by non-user visible device storage, we use array templates for those.
|
||||
auto *lhs_var = maybe_get_backing_variable(lhs_id);
|
||||
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
|
||||
lhs_is_array_template = true;
|
||||
|
||||
auto *rhs_var = maybe_get_backing_variable(rhs_id);
|
||||
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
|
||||
rhs_is_array_template = true;
|
||||
|
||||
// If threadgroup storage qualifiers are *not* used:
|
||||
// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
|
||||
if (lhs_thread && rhs_thread && !using_builtin_array())
|
||||
if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
|
||||
{
|
||||
statement(lhs, " = ", to_expression(rhs_id), ";");
|
||||
}
|
||||
@ -8452,15 +8478,15 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
|
||||
add_spv_func_and_recompile(SPVFuncImplArrayCopy);
|
||||
|
||||
const char *tag = nullptr;
|
||||
if (lhs_thread && is_constant)
|
||||
if (lhs_is_thread_storage && is_constant)
|
||||
tag = "FromConstantToStack";
|
||||
else if (lhs_storage == StorageClassWorkgroup && is_constant)
|
||||
tag = "FromConstantToThreadGroup";
|
||||
else if (lhs_thread && rhs_thread)
|
||||
else if (lhs_is_thread_storage && rhs_is_thread_storage)
|
||||
tag = "FromStackToStack";
|
||||
else if (lhs_storage == StorageClassWorkgroup && rhs_thread)
|
||||
else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
|
||||
tag = "FromStackToThreadGroup";
|
||||
else if (lhs_thread && rhs_storage == StorageClassWorkgroup)
|
||||
else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
|
||||
tag = "FromThreadGroupToStack";
|
||||
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
|
||||
tag = "FromThreadGroupToThreadGroup";
|
||||
@ -8470,19 +8496,21 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
|
||||
tag = "FromConstantToDevice";
|
||||
else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
|
||||
tag = "FromThreadGroupToDevice";
|
||||
else if (lhs_storage == StorageClassStorageBuffer && rhs_thread)
|
||||
else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
|
||||
tag = "FromStackToDevice";
|
||||
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
|
||||
tag = "FromDeviceToThreadGroup";
|
||||
else if (lhs_thread && rhs_storage == StorageClassStorageBuffer)
|
||||
else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
|
||||
tag = "FromDeviceToStack";
|
||||
else
|
||||
SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
|
||||
|
||||
// Pass internal array of spvUnsafeArray<> into wrapper functions
|
||||
if (lhs_thread && !msl_options.force_native_arrays)
|
||||
if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
|
||||
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
|
||||
if (lhs_is_array_template && !msl_options.force_native_arrays)
|
||||
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
|
||||
else if (rhs_thread && !msl_options.force_native_arrays)
|
||||
else if (rhs_is_array_template && !msl_options.force_native_arrays)
|
||||
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
|
||||
else
|
||||
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
|
||||
@ -8549,8 +8577,9 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
|
||||
if (p_v_lhs)
|
||||
flush_variable_declaration(p_v_lhs->self);
|
||||
|
||||
emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs),
|
||||
get_expression_effective_storage_class(id_rhs));
|
||||
auto lhs_storage = get_expression_effective_storage_class(id_lhs);
|
||||
auto rhs_storage = get_expression_effective_storage_class(id_rhs);
|
||||
emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage);
|
||||
register_write(id_lhs);
|
||||
|
||||
return true;
|
||||
@ -13242,9 +13271,9 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable
|
||||
// This is fine, as there cannot be concurrent writers to that memory anyways,
|
||||
// so we just ignore that case.
|
||||
|
||||
return capture_output_to_buffer &&
|
||||
variable.storage == StorageClassOutput &&
|
||||
!is_stage_output_variable_masked(variable);
|
||||
return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
|
||||
!variable_storage_requires_stage_io(variable.storage) &&
|
||||
(variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -913,8 +913,8 @@ protected:
|
||||
void add_pragma_line(const std::string &line);
|
||||
void add_typedef_line(const std::string &line);
|
||||
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
|
||||
void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
|
||||
spv::StorageClass rhs_storage) override;
|
||||
void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
|
||||
spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
|
||||
void build_implicit_builtins();
|
||||
uint32_t build_constant_uint_array_pointer();
|
||||
void emit_entry_point_declarations() override;
|
||||
|
Loading…
Reference in New Issue
Block a user