Merge pull request #1395 from KhronosGroup/fix-1394

MSL: Deal with array load-store in buffer-block structs
This commit is contained in:
Hans-Kristian Arntzen 2020-06-18 14:02:12 +02:00 committed by GitHub
commit d7976b7b24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1485 additions and 14 deletions

View File

@ -81,6 +81,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };

View File

@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Block
{
uint2 _m0[2];
uint2 _m1[2];
};
struct SSBO
{
Block _m0[3];
};
kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint2 _18[2];
ssbo._m0[0u] = Block{ { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] }, { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] } };
}

View File

@ -0,0 +1,174 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Block
{
uint2 _m0[2];
uint2 _m1[2];
};
struct SSBO
{
Block _m0[3];
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint2 _18[2];
spvUnsafeArray<uint2, 2> _27;
spvArrayCopyFromDeviceToStack1(_27.elements, ssbo._m0[0u]._m1);
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);
}

View File

@ -0,0 +1,135 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Block
{
uint2 _m0[2];
uint2 _m1[2];
};
struct SSBO
{
Block _m0[3];
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint2 _18[2];
uint2 _27[2];
spvArrayCopyFromDeviceToStack1(_27, ssbo._m0[0u]._m1);
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27);
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27);
}

View File

@ -0,0 +1,179 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Block
{
uint2 _m0[2];
uint2 _m1[2];
};
struct SSBO
{
Block _m0[3];
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
{
threadgroup uint2 _18[2];
spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
spvUnsafeArray<uint2, 2> _24;
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24.elements);
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
spvArrayCopyFromDeviceToStack1(_24.elements, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
spvArrayCopyFromConstantToStack1(_24.elements, ubo._m0[0u]._m1);
}

View File

@ -0,0 +1,140 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Block
{
uint2 _m0[2];
uint2 _m1[2];
};
struct SSBO
{
Block _m0[3];
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
{
threadgroup uint2 _18[2];
spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
uint2 _24[2];
spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24);
spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
spvArrayCopyFromDeviceToStack1(_24, ssbo._m0[0u]._m1);
spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
spvArrayCopyFromConstantToStack1(_24, ubo._m0[0u]._m1);
}

View File

@ -100,6 +100,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float shared_group[8][8];

View File

@ -72,6 +72,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
static inline __attribute__((always_inline))
float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
{

View File

@ -83,6 +83,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
static inline __attribute__((always_inline))
Data combine(thread const Data& a, thread const Data& b)
{

View File

@ -73,6 +73,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B])
{
@ -127,6 +181,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B]
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromDeviceToDevice2(device T (&dst)[A][B], device const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToDevice1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromConstantToDevice2(device T (&dst)[A][B], constant T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToDevice1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromStackToDevice2(device T (&dst)[A][B], thread const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToDevice1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromThreadGroupToDevice2(device T (&dst)[A][B], threadgroup const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToDevice1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromDeviceToStack2(thread T (&dst)[A][B], device const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToStack1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromDeviceToThreadGroup2(threadgroup T (&dst)[A][B], device const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToThreadGroup1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
{
@ -181,6 +289,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B]
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromDeviceToDevice3(device T (&dst)[A][B][C], device const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToDevice2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromConstantToDevice3(device T (&dst)[A][B][C], constant T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToDevice2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromStackToDevice3(device T (&dst)[A][B][C], thread const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToDevice2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromThreadGroupToDevice3(device T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToDevice2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromDeviceToStack3(thread T (&dst)[A][B][C], device const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToStack2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromDeviceToThreadGroup3(threadgroup T (&dst)[A][B][C], device const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromDeviceToThreadGroup2(dst[i], src[i]);
}
}
kernel void main0(device BUF& o [[buffer(0)]])
{
float c[2][2][2];

View File

@ -72,6 +72,60 @@ inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], t
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
static inline __attribute__((always_inline))
void test(thread float4 (&SPIRV_Cross_return_value)[2])
{

View File

@ -0,0 +1,54 @@
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpName %Block "Block"
OpName %SSBO "SSBO"
OpName %SSBO_Var "ssbo"
OpName %UBO_Var "ubo"
OpDecorate %SSBO_Var Binding 0
OpDecorate %SSBO_Var DescriptorSet 0
OpDecorate %UBO_Var Binding 1
OpDecorate %UBO_Var DescriptorSet 0
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %Block 0 Offset 0
OpMemberDecorate %Block 1 Offset 16
OpDecorate %BlockArray ArrayStride 32
OpDecorate %arr_uvec2_2 ArrayStride 8
OpDecorate %SSBO Block
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%uvec2 = OpTypeVector %uint 2
%arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
%arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
%arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
%arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
%arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
%wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
%Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
%Block_ptr = OpTypePointer StorageBuffer %Block
%BlockArray = OpTypeArray %Block %uint_3
%SSBO = OpTypeStruct %BlockArray
%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
%UBO_Ptr = OpTypePointer Uniform %SSBO
%UBO_Var = OpVariable %UBO_Ptr Uniform
%void = OpTypeVoid
%func_type = OpTypeFunction %void
%main = OpFunction %void None %func_type
%25 = OpLabel
%func = OpVariable %arr_uvec2_2_ptr_func Function
; Copy device array to temporary.
%ptr = OpAccessChain %Block_ptr %SSBO_Var %uint_0 %uint_0
%ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
%loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
%constructed = OpCompositeConstruct %Block %loaded_array %loaded_array
OpStore %ptr %constructed
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,53 @@
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpName %Block "Block"
OpName %SSBO "SSBO"
OpName %SSBO_Var "ssbo"
OpName %UBO_Var "ubo"
OpDecorate %SSBO_Var Binding 0
OpDecorate %SSBO_Var DescriptorSet 0
OpDecorate %UBO_Var Binding 1
OpDecorate %UBO_Var DescriptorSet 0
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %Block 0 Offset 0
OpMemberDecorate %Block 1 Offset 16
OpDecorate %BlockArray ArrayStride 32
OpDecorate %arr_uvec2_2 ArrayStride 8
OpDecorate %SSBO Block
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%uvec2 = OpTypeVector %uint 2
%arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
%arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
%arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
%arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
%arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
%wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
%Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
%BlockArray = OpTypeArray %Block %uint_3
%SSBO = OpTypeStruct %BlockArray
%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
%UBO_Ptr = OpTypePointer Uniform %SSBO
%UBO_Var = OpVariable %UBO_Ptr Uniform
%void = OpTypeVoid
%func_type = OpTypeFunction %void
%main = OpFunction %void None %func_type
%25 = OpLabel
%func = OpVariable %arr_uvec2_2_ptr_func Function
; Copy device array to temporary.
%ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
%ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
%loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %ptr_arr_0 %loaded_array
OpStore %ptr_arr_0 %loaded_array
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,53 @@
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpName %Block "Block"
OpName %SSBO "SSBO"
OpName %SSBO_Var "ssbo"
OpName %UBO_Var "ubo"
OpDecorate %SSBO_Var Binding 0
OpDecorate %SSBO_Var DescriptorSet 0
OpDecorate %UBO_Var Binding 1
OpDecorate %UBO_Var DescriptorSet 0
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %Block 0 Offset 0
OpMemberDecorate %Block 1 Offset 16
OpDecorate %BlockArray ArrayStride 32
OpDecorate %arr_uvec2_2 ArrayStride 8
OpDecorate %SSBO Block
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%uvec2 = OpTypeVector %uint 2
%arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
%arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
%arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
%arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
%arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
%wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
%Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
%BlockArray = OpTypeArray %Block %uint_3
%SSBO = OpTypeStruct %BlockArray
%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
%UBO_Ptr = OpTypePointer Uniform %SSBO
%UBO_Var = OpVariable %UBO_Ptr Uniform
%void = OpTypeVoid
%func_type = OpTypeFunction %void
%main = OpFunction %void None %func_type
%25 = OpLabel
%func = OpVariable %arr_uvec2_2_ptr_func Function
; Copy device array to temporary.
%ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
%ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
%loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %ptr_arr_0 %loaded_array
OpStore %ptr_arr_0 %loaded_array
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,81 @@
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpName %Block "Block"
OpName %SSBO "SSBO"
OpName %SSBO_Var "ssbo"
OpName %UBO_Var "ubo"
OpDecorate %SSBO_Var Binding 0
OpDecorate %SSBO_Var DescriptorSet 0
OpDecorate %UBO_Var Binding 1
OpDecorate %UBO_Var DescriptorSet 0
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %Block 0 Offset 0
OpMemberDecorate %Block 1 Offset 16
OpDecorate %BlockArray ArrayStride 32
OpDecorate %arr_uvec2_2 ArrayStride 8
OpDecorate %SSBO Block
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%uvec2 = OpTypeVector %uint 2
%arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
%arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
%arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
%arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
%arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
%wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
%Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
%BlockArray = OpTypeArray %Block %uint_3
%SSBO = OpTypeStruct %BlockArray
%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
%UBO_Ptr = OpTypePointer Uniform %SSBO
%UBO_Var = OpVariable %UBO_Ptr Uniform
%void = OpTypeVoid
%func_type = OpTypeFunction %void
%main = OpFunction %void None %func_type
%25 = OpLabel
%func = OpVariable %arr_uvec2_2_ptr_func Function
; DeviceToDevice
%ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
%ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
%loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %ptr_arr_0 %loaded_array
; ConstantToDevice
%ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1
%loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %ptr_arr_0 %loaded_array_const
; StackToDevice
%loaded_array_func = OpLoad %arr_uvec2_2 %func
OpStore %ptr_arr_0 %loaded_array_func
; ThreadGroupToDevice
%loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg
OpStore %ptr_arr_0 %loaded_array_workgroup
; DeviceToThreadGroup
%loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %wg %loaded_array_2
; DeviceToStack
%loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %func %loaded_array_3
; ConstantToThreadGroup
%loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %wg %loaded_array_const_2
; ConstantToStack
%loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %func %loaded_array_const_3
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,81 @@
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpName %Block "Block"
OpName %SSBO "SSBO"
OpName %SSBO_Var "ssbo"
OpName %UBO_Var "ubo"
OpDecorate %SSBO_Var Binding 0
OpDecorate %SSBO_Var DescriptorSet 0
OpDecorate %UBO_Var Binding 1
OpDecorate %UBO_Var DescriptorSet 0
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %Block 0 Offset 0
OpMemberDecorate %Block 1 Offset 16
OpDecorate %BlockArray ArrayStride 32
OpDecorate %arr_uvec2_2 ArrayStride 8
OpDecorate %SSBO Block
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_3 = OpConstant %uint 3
%uvec2 = OpTypeVector %uint 2
%arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
%arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
%arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
%arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
%arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
%wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
%Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
%BlockArray = OpTypeArray %Block %uint_3
%SSBO = OpTypeStruct %BlockArray
%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
%UBO_Ptr = OpTypePointer Uniform %SSBO
%UBO_Var = OpVariable %UBO_Ptr Uniform
%void = OpTypeVoid
%func_type = OpTypeFunction %void
%main = OpFunction %void None %func_type
%25 = OpLabel
%func = OpVariable %arr_uvec2_2_ptr_func Function
; DeviceToDevice
%ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
%ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
%loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %ptr_arr_0 %loaded_array
; ConstantToDevice
%ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1
%loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %ptr_arr_0 %loaded_array_const
; StackToDevice
%loaded_array_func = OpLoad %arr_uvec2_2 %func
OpStore %ptr_arr_0 %loaded_array_func
; ThreadGroupToDevice
%loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg
OpStore %ptr_arr_0 %loaded_array_workgroup
; DeviceToThreadGroup
%loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %wg %loaded_array_2
; DeviceToStack
%loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1
OpStore %func %loaded_array_3
; ConstantToThreadGroup
%loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %wg %loaded_array_const_2
; ConstantToStack
%loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
OpStore %func %loaded_array_const_3
OpReturn
OpFunctionEnd

View File

@ -273,11 +273,27 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
return var;
}
StorageClass Compiler::get_backing_variable_storage(uint32_t ptr)
StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
{
auto *var = maybe_get_backing_variable(ptr);
if (var)
return var->storage;
// If the expression has been lowered to a temporary, we need to use the Generic storage class.
// We're looking for the effective storage class of a given expression.
// An access chain or forwarded OpLoads from such access chains
// will generally have the storage class of the underlying variable, but if the load was not forwarded
// we have lost any address space qualifiers.
bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression &&
!get<SPIRExpression>(ptr).access_chain &&
(forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
if (var && !forced_temporary)
{
// Normalize SSBOs to StorageBuffer here.
if (var->storage == StorageClassUniform && has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
return StorageClassStorageBuffer;
else
return var->storage;
}
else
return expression_type(ptr).storage;
}

View File

@ -611,7 +611,7 @@ protected:
bool expression_is_lvalue(uint32_t id) const;
bool variable_storage_is_aliased(const SPIRVariable &var);
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
spv::StorageClass get_backing_variable_storage(uint32_t ptr);
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
void register_write(uint32_t chain);

View File

@ -3428,10 +3428,15 @@ string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const
return expr;
}
string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
{
auto &type = expression_type(id);
if (!backend.array_is_value_type && !type.array.empty())
bool reroll_array = !type.array.empty() &&
(!backend.array_is_value_type ||
(uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
if (reroll_array)
{
// For this case, we need to "re-roll" an array initializer from a temporary.
// We cannot simply pass the array directly, since it decays to a pointer and it cannot
@ -5689,6 +5694,25 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
return c->constant_is_null();
}
bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
{
auto &type = expression_type(ptr);
if (type.array.empty())
return false;
if (!backend.array_is_value_type)
return true;
auto *var = maybe_get_backing_variable(ptr);
if (!var)
return false;
auto &backed_type = get<SPIRType>(var->basetype);
return !backend.buffer_offset_array_is_value_type &&
backed_type.basetype == SPIRType::Struct &&
has_member_decoration(backed_type.self, 0, DecorationOffset);
}
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
// For some subclasses, the function is a method on the specified image.
string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
@ -8410,7 +8434,10 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32
if (i)
op += ", ";
subop = to_composite_constructor_expression(elems[i]);
bool uses_buffer_offset = type.basetype == SPIRType::Struct &&
has_member_decoration(type.self, i, DecorationOffset);
subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
}
base = e ? e->base_expression : ID(0);
@ -8732,13 +8759,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
(type.basetype == SPIRType::Struct || (type.columns > 1));
SPIRExpression *e = nullptr;
if (!backend.array_is_value_type && !type.array.empty() && !forward)
if (!forward && expression_is_non_value_type_array(ptr))
{
// Complicated load case where we need to make a copy of ptr, but we cannot, because
// it is an array, and our backend does not support arrays as value types.
// Emit the temporary, and copy it explicitly.
e = &emit_uninitialized_temporary_expression(result_type, id);
emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr));
emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
}
else
e = &emit_op(result_type, id, expr, forward, !usage_tracking);
@ -13403,7 +13430,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
if (ir.ids[block.return_value].get_type() != TypeUndef)
{
emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction,
get_backing_variable_storage(block.return_value));
get_expression_effective_storage_class(block.return_value));
}
if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||

View File

@ -467,6 +467,7 @@ protected:
bool supports_extensions = false;
bool supports_empty_struct = false;
bool array_is_value_type = true;
bool buffer_offset_array_is_value_type = true;
bool comparison_image_samples_scalar = false;
bool native_pointers = false;
bool support_small_type_sampling_result = false;
@ -585,7 +586,7 @@ protected:
SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
std::string to_expression(uint32_t id, bool register_expression_read = true);
std::string to_composite_constructor_expression(uint32_t id);
std::string to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset);
std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type);
std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
@ -762,6 +763,7 @@ protected:
void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
bool expression_is_constant_null(uint32_t id) const;
bool expression_is_non_value_type_array(uint32_t ptr);
virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression);
uint32_t get_integer_width_for_instruction(const Instruction &instr) const;

View File

@ -1028,6 +1028,8 @@ string CompilerMSL::compile()
// Allow Metal to use the array<T> template unless we force it off.
backend.can_return_array = !msl_options.force_native_arrays;
backend.array_is_value_type = !msl_options.force_native_arrays;
// Arrays which are part of buffer objects are never considered to be native arrays.
backend.buffer_offset_array_is_value_type = false;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@ -3820,17 +3822,21 @@ void CompilerMSL::emit_custom_functions()
static const char *function_name_tags[] = {
"FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack",
"FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup",
"FromDeviceToDevice", "FromConstantToDevice", "FromStackToDevice",
"FromThreadGroupToDevice", "FromDeviceToStack", "FromDeviceToThreadGroup",
};
static const char *src_address_space[] = {
"constant", "constant", "thread const", "thread const", "threadgroup const", "threadgroup const",
"device const", "constant", "thread const", "threadgroup const", "device const", "device const",
};
static const char *dst_address_space[] = {
"thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
"device", "device", "device", "device", "thread", "threadgroup",
};
for (uint32_t variant = 0; variant < 6; variant++)
for (uint32_t variant = 0; variant < 12; variant++)
{
uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
string tmp = "template<typename T";
@ -6869,6 +6875,10 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
{
is_constant = true;
}
else if (rhs_storage == StorageClassUniform)
{
is_constant = true;
}
// For the case where we have OpLoad triggering an array copy,
// we cannot easily detect this case ahead of time since it's
@ -6897,6 +6907,18 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
tag = "FromThreadGroupToStack";
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
tag = "FromThreadGroupToThreadGroup";
else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer)
tag = "FromDeviceToDevice";
else if (lhs_storage == StorageClassStorageBuffer && is_constant)
tag = "FromConstantToDevice";
else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
tag = "FromThreadGroupToDevice";
else if (lhs_storage == StorageClassStorageBuffer && rhs_thread)
tag = "FromStackToDevice";
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
tag = "FromDeviceToThreadGroup";
else if (lhs_thread && rhs_storage == StorageClassStorageBuffer)
tag = "FromDeviceToStack";
else
SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
@ -6943,8 +6965,8 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
if (p_v_lhs)
flush_variable_declaration(p_v_lhs->self);
emit_array_copy(to_expression(id_lhs), id_rhs, get_backing_variable_storage(id_lhs),
get_backing_variable_storage(id_rhs));
emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs),
get_expression_effective_storage_class(id_rhs));
register_write(id_lhs);
return true;