SPIRV-Cross/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp
Bill Hollings ec054dad7f MSL: Support synthetic functions in function constants.
Emit synthetic functions before function constants.
Support use of spvQuantizeToF16() in function constants for numerical
behavior consistency with the op code.
Ensure subnormal results from OpQuantizeToF16 are flushed to zero per SPIR-V spec.

Adjust SPIRV-Cross unit test reference shaders to accommodate these changes.
Any MSL reference shader that inclues a synthetic function is affected,
since the location it is emitted has changed.
2021-09-28 19:10:16 -04:00

173 lines
4.1 KiB
Plaintext

#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u);
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float shared_group[8][8];
threadgroup float shared_group_alt[8][8];
spvUnsafeArray<float, 8> blob;
for (int i = 0; i < 8; i++)
{
blob[i] = float(i);
}
spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements);
threadgroup_barrier(mem_flags::mem_threadgroup);
spvUnsafeArray<float, 8> copied_blob;
spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]);
spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]);
}