MSL: Rewrite how IO blocks are emitted in multi-patch mode.

Firstly, never flatten inputs or outputs in multi-patch mode.
The main scenario where we do need to care is Block IO.
In this case, we should only flatten the top-level member, and after
that we use access chains as normal.

Using structs in Input storage class is now possible as well. We don't
need to consider per-location fixups at all here. In Vulkan, IO structs
must match exactly. Only plain vectors can have smaller vector sizes as
a special case.
This commit is contained in:
Hans-Kristian Arntzen 2021-04-08 11:47:35 +02:00
parent 425e968720
commit 46c48ee6b5
16 changed files with 463 additions and 189 deletions

View File

@ -16,8 +16,7 @@ struct main0_out
struct main0_in
{
float3 Boo_a;
uint3 Boo_b;
Boo vInput;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
@ -26,8 +25,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
Boo _26 = Boo{ gl_in[gl_InvocationID].Boo_a, gl_in[gl_InvocationID].Boo_b };
gl_out[gl_InvocationID].vVertex = _26;
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput;
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);

View File

@ -9,8 +9,6 @@ struct main0_out
float4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
out.gl_PointSize = 4.0;
out.gl_ClipDistance[0] = 1.0;
out.gl_ClipDistance[1] = 0.5;
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -9,8 +9,6 @@ struct main0_out
float4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
out.gl_PointSize = 4.0;
out.gl_ClipDistance[0] = 1.0;
out.gl_ClipDistance[1] = 0.5;
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -9,8 +9,6 @@ struct main0_out
float4 v1;
float4 gl_Position;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
gl_PointSize = 4.0;
out.gl_ClipDistance[0] = 1.0;
out.gl_ClipDistance[1] = 0.5;
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -0,0 +1,107 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Meep
{
float a;
float b;
};
struct Block
{
spvUnsafeArray<float, 2> a;
float b;
float2x2 m;
Meep meep;
spvUnsafeArray<Meep, 2> meeps;
};
struct main0_out
{
float4 gl_Position;
};
struct main0_patchOut
{
spvUnsafeArray<float, 2> a;
float b;
float2x2 m;
Meep meep;
spvUnsafeArray<Meep, 2> meeps;
spvUnsafeArray<float, 2> Block_a;
float Block_b;
float2x2 Block_m;
Meep Block_meep;
spvUnsafeArray<Meep, 2> Block_meeps;
};
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
gl_out[gl_InvocationID].gl_Position = float4(1.0);
patchOut.a[0] = 1.0;
patchOut.a[1] = 2.0;
patchOut.b = 3.0;
patchOut.m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0));
patchOut.meep.a = 4.0;
patchOut.meep.b = 5.0;
patchOut.meeps[0].a = 6.0;
patchOut.meeps[0].b = 7.0;
patchOut.meeps[1].a = 8.0;
patchOut.meeps[1].b = 9.0;
patchOut.Block_a[0] = 1.0;
patchOut.Block_a[1] = 2.0;
patchOut.Block_b = 3.0;
patchOut.Block_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0));
patchOut.Block_meep.a = 4.0;
patchOut.Block_meep.b = 5.0;
patchOut.Block_meeps[0].a = 6.0;
patchOut.Block_meeps[0].b = 7.0;
patchOut.Block_meeps[1].a = 8.0;
patchOut.Block_meeps[1].b = 9.0;
}

View File

@ -58,9 +58,7 @@ struct main0_out
struct main0_in
{
float4x4 VertexData_a;
spvUnsafeArray<float4, 2> VertexData_b;
float4 VertexData_c;
VertexData vInputs;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
@ -69,10 +67,10 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ VertexData{ gl_in[0].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[0].VertexData_b[0], gl_in[0].VertexData_b[1] }), gl_in[0].VertexData_c }, VertexData{ gl_in[1].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[1].VertexData_b[0], gl_in[1].VertexData_b[1] }), gl_in[1].VertexData_c }, VertexData{ gl_in[2].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[2].VertexData_b[0], gl_in[2].VertexData_b[1] }), gl_in[2].VertexData_c }, VertexData{ gl_in[3].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[3].VertexData_b[0], gl_in[3].VertexData_b[1] }), gl_in[3].VertexData_c }, VertexData{ gl_in[4].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[4].VertexData_b[0], gl_in[4].VertexData_b[1] }), gl_in[4].VertexData_c }, VertexData{ gl_in[5].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[5].VertexData_b[0], gl_in[5].VertexData_b[1] }), gl_in[5].VertexData_c }, VertexData{ gl_in[6].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[6].VertexData_b[0], gl_in[6].VertexData_b[1] }), gl_in[6].VertexData_c }, VertexData{ gl_in[7].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[7].VertexData_b[0], gl_in[7].VertexData_b[1] }), gl_in[7].VertexData_c }, VertexData{ gl_in[8].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[8].VertexData_b[0], gl_in[8].VertexData_b[1] }), gl_in[8].VertexData_c }, VertexData{ gl_in[9].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[9].VertexData_b[0], gl_in[9].VertexData_b[1] }), gl_in[9].VertexData_c }, VertexData{ gl_in[10].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[10].VertexData_b[0], gl_in[10].VertexData_b[1] }), gl_in[10].VertexData_c }, VertexData{ gl_in[11].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[11].VertexData_b[0], gl_in[11].VertexData_b[1] }), gl_in[11].VertexData_c }, VertexData{ gl_in[12].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[12].VertexData_b[0], gl_in[12].VertexData_b[1] }), gl_in[12].VertexData_c }, VertexData{ gl_in[13].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[13].VertexData_b[0], gl_in[13].VertexData_b[1] }), gl_in[13].VertexData_c }, VertexData{ gl_in[14].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[14].VertexData_b[0], gl_in[14].VertexData_b[1] }), gl_in[14].VertexData_c }, VertexData{ gl_in[15].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[15].VertexData_b[0], gl_in[15].VertexData_b[1] }), gl_in[15].VertexData_c }, VertexData{ gl_in[16].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[16].VertexData_b[0], gl_in[16].VertexData_b[1] }), gl_in[16].VertexData_c }, VertexData{ gl_in[17].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[17].VertexData_b[0], gl_in[17].VertexData_b[1] }), gl_in[17].VertexData_c }, VertexData{ gl_in[18].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[18].VertexData_b[0], gl_in[18].VertexData_b[1] }), gl_in[18].VertexData_c }, VertexData{ gl_in[19].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[19].VertexData_b[0], gl_in[19].VertexData_b[1] }), gl_in[19].VertexData_c }, VertexData{ gl_in[20].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[20].VertexData_b[0], gl_in[20].VertexData_b[1] }), gl_in[20].VertexData_c }, VertexData{ gl_in[21].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[21].VertexData_b[0], gl_in[21].VertexData_b[1] }), gl_in[21].VertexData_c }, VertexData{ gl_in[22].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[22].VertexData_b[0], gl_in[22].VertexData_b[1] }), gl_in[22].VertexData_c }, VertexData{ gl_in[23].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[23].VertexData_b[0], gl_in[23].VertexData_b[1] }), gl_in[23].VertexData_c }, VertexData{ gl_in[24].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[24].VertexData_b[0], gl_in[24].VertexData_b[1] }), gl_in[24].VertexData_c }, VertexData{ gl_in[25].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[25].VertexData_b[0], gl_in[25].VertexData_b[1] }), gl_in[25].VertexData_c }, VertexData{ gl_in[26].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[26].VertexData_b[0], gl_in[26].VertexData_b[1] }), gl_in[26].VertexData_c }, VertexData{ gl_in[27].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[27].VertexData_b[0], gl_in[27].VertexData_b[1] }), gl_in[27].VertexData_c }, VertexData{ gl_in[28].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[28].VertexData_b[0], gl_in[28].VertexData_b[1] }), gl_in[28].VertexData_c }, VertexData{ gl_in[29].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[29].VertexData_b[0], gl_in[29].VertexData_b[1] }), gl_in[29].VertexData_c }, VertexData{ gl_in[30].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[30].VertexData_b[0], gl_in[30].VertexData_b[1] }), gl_in[30].VertexData_c }, VertexData{ gl_in[31].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[31].VertexData_b[0], gl_in[31].VertexData_b[1] }), gl_in[31].VertexData_c } });
spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
spvUnsafeArray<VertexData, 32> tmp;
tmp = _19;
int _27 = gl_InvocationID ^ 1;
gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].VertexData_c;
gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].vInputs.c;
}

View File

@ -64,7 +64,7 @@ struct HSConstantOut
struct VertexOutput_1
{
float3 uv;
float2 uv;
};
struct HSOut_1
@ -80,8 +80,8 @@ struct main0_out
struct main0_in
{
float3 VertexOutput_uv;
ushort2 m_172;
VertexOutput_1 p;
ushort2 m_171;
float4 gl_Position;
};
@ -113,11 +113,11 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1]);
spvUnsafeArray<VertexOutput, 3> p;
p[0].pos = gl_in[0].gl_Position;
p[0].uv = gl_in[0].VertexOutput_uv.xy;
p[0].uv = gl_in[0].p.uv;
p[1].pos = gl_in[1].gl_Position;
p[1].uv = gl_in[1].VertexOutput_uv.xy;
p[1].uv = gl_in[1].p.uv;
p[2].pos = gl_in[2].gl_Position;
p[2].uv = gl_in[2].VertexOutput_uv.xy;
p[2].uv = gl_in[2].p.uv;
uint i = gl_InvocationID;
spvUnsafeArray<VertexOutput, 3> param;
param = p;

View File

@ -16,8 +16,7 @@ struct main0_out
struct main0_in
{
float3 Boo_a;
uint3 Boo_b;
Boo vInput;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
@ -26,8 +25,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
Boo _26 = Boo{ gl_in[gl_InvocationID].Boo_a, gl_in[gl_InvocationID].Boo_b };
gl_out[gl_InvocationID].vVertex = _26;
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput;
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);

View File

@ -11,8 +11,6 @@ struct main0_out
float4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
static inline __attribute__((always_inline))
@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -11,8 +11,6 @@ struct main0_out
float4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
static inline __attribute__((always_inline))
@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -11,8 +11,6 @@ struct main0_out
float4 v1;
float4 gl_Position;
float gl_ClipDistance[2];
float gl_ClipDistance_0;
float gl_ClipDistance_1;
};
static inline __attribute__((always_inline))
@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance);
out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
}

View File

@ -0,0 +1,113 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Meep
{
float a;
float b;
};
struct Block
{
spvUnsafeArray<float, 2> a;
float b;
float2x2 m;
Meep meep;
spvUnsafeArray<Meep, 2> meeps;
};
struct main0_out
{
float4 gl_Position;
};
struct main0_patchOut
{
spvUnsafeArray<float, 2> a;
float b;
float2x2 m;
Meep meep;
spvUnsafeArray<Meep, 2> meeps;
spvUnsafeArray<float, 2> Block_a;
float Block_b;
float2x2 Block_m;
Meep Block_meep;
spvUnsafeArray<Meep, 2> Block_meeps;
};
static inline __attribute__((always_inline))
void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float, 2>& a, device float& b, device float2x2& m, device Meep& meep, device spvUnsafeArray<Meep, 2>& meeps, device main0_patchOut& patchOut)
{
gl_out[gl_InvocationID].gl_Position = float4(1.0);
a[0] = 1.0;
a[1] = 2.0;
b = 3.0;
m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0));
meep.a = 4.0;
meep.b = 5.0;
meeps[0].a = 6.0;
meeps[0].b = 7.0;
meeps[1].a = 8.0;
meeps[1].b = 9.0;
patchOut.Block_a[0] = 1.0;
patchOut.Block_a[1] = 2.0;
patchOut.Block_b = 3.0;
patchOut.Block_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0));
patchOut.Block_meep.a = 4.0;
patchOut.Block_meep.b = 5.0;
patchOut.Block_meeps[0].a = 6.0;
patchOut.Block_meeps[0].b = 7.0;
patchOut.Block_meeps[1].a = 8.0;
patchOut.Block_meeps[1].b = 9.0;
}
kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
write_in_func(gl_out, gl_InvocationID, patchOut.a, patchOut.b, patchOut.m, patchOut.meep, patchOut.meeps, patchOut);
}

View File

@ -58,9 +58,7 @@ struct main0_out
struct main0_in
{
float4x4 VertexData_a;
spvUnsafeArray<float4, 2> VertexData_b;
float4 VertexData_c;
VertexData vInputs;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
@ -69,12 +67,10 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ VertexData{ gl_in[0].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[0].VertexData_b[0], gl_in[0].VertexData_b[1] }), gl_in[0].VertexData_c }, VertexData{ gl_in[1].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[1].VertexData_b[0], gl_in[1].VertexData_b[1] }), gl_in[1].VertexData_c }, VertexData{ gl_in[2].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[2].VertexData_b[0], gl_in[2].VertexData_b[1] }), gl_in[2].VertexData_c }, VertexData{ gl_in[3].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[3].VertexData_b[0], gl_in[3].VertexData_b[1] }), gl_in[3].VertexData_c }, VertexData{ gl_in[4].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[4].VertexData_b[0], gl_in[4].VertexData_b[1] }), gl_in[4].VertexData_c }, VertexData{ gl_in[5].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[5].VertexData_b[0], gl_in[5].VertexData_b[1] }), gl_in[5].VertexData_c }, VertexData{ gl_in[6].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[6].VertexData_b[0], gl_in[6].VertexData_b[1] }), gl_in[6].VertexData_c }, VertexData{ gl_in[7].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[7].VertexData_b[0], gl_in[7].VertexData_b[1] }), gl_in[7].VertexData_c }, VertexData{ gl_in[8].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[8].VertexData_b[0], gl_in[8].VertexData_b[1] }), gl_in[8].VertexData_c }, VertexData{ gl_in[9].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[9].VertexData_b[0], gl_in[9].VertexData_b[1] }), gl_in[9].VertexData_c }, VertexData{ gl_in[10].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[10].VertexData_b[0], gl_in[10].VertexData_b[1] }), gl_in[10].VertexData_c }, VertexData{ gl_in[11].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[11].VertexData_b[0], gl_in[11].VertexData_b[1] }), gl_in[11].VertexData_c }, VertexData{ gl_in[12].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[12].VertexData_b[0], gl_in[12].VertexData_b[1] }), gl_in[12].VertexData_c }, VertexData{ gl_in[13].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[13].VertexData_b[0], gl_in[13].VertexData_b[1] }), gl_in[13].VertexData_c }, VertexData{ gl_in[14].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[14].VertexData_b[0], gl_in[14].VertexData_b[1] }), gl_in[14].VertexData_c }, VertexData{ gl_in[15].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[15].VertexData_b[0], gl_in[15].VertexData_b[1] }), gl_in[15].VertexData_c }, VertexData{ gl_in[16].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[16].VertexData_b[0], gl_in[16].VertexData_b[1] }), gl_in[16].VertexData_c }, VertexData{ gl_in[17].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[17].VertexData_b[0], gl_in[17].VertexData_b[1] }), gl_in[17].VertexData_c }, VertexData{ gl_in[18].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[18].VertexData_b[0], gl_in[18].VertexData_b[1] }), gl_in[18].VertexData_c }, VertexData{ gl_in[19].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[19].VertexData_b[0], gl_in[19].VertexData_b[1] }), gl_in[19].VertexData_c }, VertexData{ gl_in[20].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[20].VertexData_b[0], gl_in[20].VertexData_b[1] }), gl_in[20].VertexData_c }, VertexData{ gl_in[21].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[21].VertexData_b[0], gl_in[21].VertexData_b[1] }), gl_in[21].VertexData_c }, VertexData{ gl_in[22].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[22].VertexData_b[0], gl_in[22].VertexData_b[1] }), gl_in[22].VertexData_c }, VertexData{ gl_in[23].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[23].VertexData_b[0], gl_in[23].VertexData_b[1] }), gl_in[23].VertexData_c }, VertexData{ gl_in[24].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[24].VertexData_b[0], gl_in[24].VertexData_b[1] }), gl_in[24].VertexData_c }, VertexData{ gl_in[25].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[25].VertexData_b[0], gl_in[25].VertexData_b[1] }), gl_in[25].VertexData_c }, VertexData{ gl_in[26].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[26].VertexData_b[0], gl_in[26].VertexData_b[1] }), gl_in[26].VertexData_c }, VertexData{ gl_in[27].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[27].VertexData_b[0], gl_in[27].VertexData_b[1] }), gl_in[27].VertexData_c }, VertexData{ gl_in[28].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[28].VertexData_b[0], gl_in[28].VertexData_b[1] }), gl_in[28].VertexData_c }, VertexData{ gl_in[29].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[29].VertexData_b[0], gl_in[29].VertexData_b[1] }), gl_in[29].VertexData_c }, VertexData{ gl_in[30].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[30].VertexData_b[0], gl_in[30].VertexData_b[1] }), gl_in[30].VertexData_c }, VertexData{ gl_in[31].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[31].VertexData_b[0], gl_in[31].VertexData_b[1] }), gl_in[31].VertexData_c } });
spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
spvUnsafeArray<VertexData, 32> tmp;
tmp = _19;
int _27 = gl_InvocationID ^ 1;
VertexData _30 = VertexData{ gl_in[_27].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[_27].VertexData_b[0], gl_in[_27].VertexData_b[1] }), gl_in[_27].VertexData_c };
VertexData tmp_single = _30;
VertexData tmp_single = gl_in[gl_InvocationID ^ 1].vInputs;
gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c;
}

View File

@ -0,0 +1,55 @@
#version 450
layout(vertices = 4) out;
struct Meep
{
float a;
float b;
};
layout(location = 0) patch out float a[2];
layout(location = 2) patch out float b;
layout(location = 3) patch out mat2 m;
layout(location = 5) patch out Meep meep;
layout(location = 7) patch out Meep meeps[2];
layout(location = 11) patch out Block
{
float a[2];
float b;
mat2 m;
Meep meep;
Meep meeps[2];
} B;
void write_in_func()
{
gl_out[gl_InvocationID].gl_Position = vec4(1.0);
a[0] = 1.0;
a[1] = 2.0;
b = 3.0;
m = mat2(2.0);
meep.a = 4.0;
meep.b = 5.0;
meeps[0].a = 6.0;
meeps[0].b = 7.0;
meeps[1].a = 8.0;
meeps[1].b = 9.0;
B.a[0] = 1.0;
B.a[1] = 2.0;
B.b = 3.0;
B.m = mat2(4.0);
B.meep.a = 4.0;
B.meep.b = 5.0;
B.meeps[0].a = 6.0;
B.meeps[0].b = 7.0;
B.meeps[1].a = 8.0;
B.meeps[1].b = 9.0;
}
void main()
{
write_in_func();
}

View File

@ -1731,8 +1731,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
// Add the global variables as arguments to the function
if (func_id != ir.default_entry_point)
{
bool added_in = false;
bool added_out = false;
bool control_point_added_in = false;
bool control_point_added_out = false;
bool patch_added_in = false;
bool patch_added_out = false;
for (uint32_t arg_id : added_arg_ids)
{
auto &var = get<SPIRVariable>(arg_id);
@ -1741,16 +1744,19 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn));
bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type);
bool is_block = has_decoration(p_type->self, DecorationBlock);
bool is_control_point_storage =
!is_patch &&
((is_tessellation_shader() && var.storage == StorageClassInput) ||
(get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput));
bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput;
bool is_builtin = is_builtin_variable(var);
bool variable_is_stage_io =
!is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
p_type->basetype == SPIRType::Struct;
bool is_redirected_to_global_stage_io = is_control_point_storage && variable_is_stage_io;
bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) &&
variable_is_stage_io;
// If output is masked it is not considered part of the global stage IO interface.
if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput)
@ -1762,7 +1768,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
// Similarly, tessellation evaluation shaders see per-vertex inputs as arrays.
// We collected them into a structure; we must pass the array of this
// structure to the function.
std::string name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
std::string name;
if (is_patch)
name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name;
else
name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
if (var.storage == StorageClassOutput &&
has_decoration(p_type->self, DecorationBlock) &&
@ -1797,16 +1807,18 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
// structure to the function.
if (var.storage == StorageClassInput)
{
auto &added_in = is_patch ? patch_added_in : control_point_added_in;
if (added_in)
continue;
arg_id = stage_in_ptr_var_id;
arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id;
added_in = true;
}
else if (var.storage == StorageClassOutput)
{
auto &added_out = is_patch ? patch_added_out : control_point_added_out;
if (added_out)
continue;
arg_id = stage_out_ptr_var_id;
arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id;
added_out = true;
}
@ -1915,14 +1927,23 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
uint32_t CompilerMSL::type_to_location_count(const SPIRType &type) const
{
// In MSL, we cannot place structs in any context where we need locations.
assert(type.basetype != SPIRType::Struct);
uint32_t count;
if (type.basetype == SPIRType::Struct)
{
uint32_t mbr_count = uint32_t(type.member_types.size());
count = 0;
for (uint32_t i = 0; i < mbr_count; i++)
count += type_to_location_count(get<SPIRType>(type.member_types[i]));
}
else
{
count = type.columns > 1 ? type.columns : 1;
}
uint32_t dim = 1;
for (uint32_t i = 0; i < type.array.size(); i++)
dim *= to_array_size_literal(type, i);
uint32_t dim_count = uint32_t(type.array.size());
for (uint32_t i = 0; i < dim_count; i++)
count *= to_array_size_literal(type, i);
uint32_t count = dim * type.columns;
return count;
}
@ -2911,6 +2932,16 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_
}
}
bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const
{
if (storage == StorageClassOutput)
return !capture_output_to_buffer;
else if (storage == StorageClassInput)
return !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup);
else
return false;
}
void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
SPIRVariable &var, InterfaceBlockMeta &meta)
{
@ -2942,7 +2973,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
if (var_type.basetype == SPIRType::Struct)
{
bool block_requires_flattening = is_block || !capture_output_to_buffer || storage == StorageClassInput;
bool block_requires_flattening = variable_storage_requires_stage_io(storage) || is_block;
bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration;
if (needs_local_declaration)
@ -3009,10 +3040,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type);
bool attribute_load_store =
storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
bool storage_is_stage_io =
(storage == StorageClassInput && !(get_execution_model() == ExecutionModelTessellationControl &&
msl_options.multi_patch_workgroup)) ||
storage == StorageClassOutput;
bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
// ClipDistance always needs to be declared as user attributes.
if (builtin == BuiltInClipDistance)
@ -3042,10 +3070,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
if (!is_builtin || has_active_builtin(builtin, storage))
{
bool is_composite_type = is_matrix(var_type) || is_array(var_type);
bool storage_is_stage_io =
(storage == StorageClassInput &&
!(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup)) ||
(storage == StorageClassOutput && !capture_output_to_buffer);
bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
// ClipDistance always needs to be declared as user attributes.
@ -3088,8 +3113,11 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t
auto &type = get_variable_element_type(var);
bool flatten_composites = variable_storage_requires_stage_io(var.storage);
bool is_block = has_decoration(type.self, DecorationBlock);
uint32_t mbr_idx = uint32_t(-1);
if (type.basetype == SPIRType::Struct)
if (type.basetype == SPIRType::Struct && (flatten_composites || is_block))
mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
if (mbr_idx != uint32_t(-1))
@ -3580,6 +3608,10 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat
{
auto &type = get<SPIRType>(type_id);
// Struct types must match exactly.
if (type.basetype == SPIRType::Struct)
return type_id;
auto p_va = inputs_by_location.find(location);
if (p_va == end(inputs_by_location))
{
@ -6525,16 +6557,22 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation)
return false;
bool multi_patch_tess_ctl = get_execution_model() == ExecutionModelTessellationControl &&
msl_options.multi_patch_workgroup && ptr_type.storage == StorageClassInput;
bool flat_matrix = is_matrix(result_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl;
bool flat_struct = result_type.basetype == SPIRType::Struct && ptr_type.storage == StorageClassInput;
bool flat_data_type = flat_matrix || is_array(result_type) || flat_struct;
if (!flat_data_type)
return false;
if (has_decoration(ptr, DecorationPatch))
return false;
bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable;
bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage);
bool flat_data_type = flattened_io &&
(is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct);
// Edge case, even with multi-patch workgroups, we still need to unroll load
// if we're loading control points directly.
if (ptr_is_io_variable && is_array(result_type))
flat_data_type = true;
if (!flat_data_type)
return false;
// Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out.
// Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup
@ -6543,12 +6581,31 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex);
auto *var = maybe_get_backing_variable(ptr);
bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable;
auto &expr_type = get_pointee_type(ptr_type.self);
const auto &iface_type = expression_type(stage_in_ptr_var_id);
if (result_type.array.size() > 2)
if (!flattened_io)
{
// Simplest case for multi-patch workgroups, just unroll array as-is.
if (interface_index == uint32_t(-1))
return false;
expr += type_to_glsl(result_type) + "({ ";
uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1);
for (uint32_t i = 0; i < num_control_points; i++)
{
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
if (i + 1 < num_control_points)
expr += ", ";
}
expr += " })";
}
else if (result_type.array.size() > 2)
{
SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions.");
}
@ -6558,7 +6615,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable.");
if (interface_index == uint32_t(-1))
SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
if (result_type.basetype == SPIRType::Struct || flat_matrix)
if (result_type.basetype == SPIRType::Struct || is_matrix(result_type))
SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO.");
expr += type_to_glsl(result_type) + "({ ";
@ -6572,44 +6629,19 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
expr += type_to_glsl(sub_type) + "({ ";
interface_index = base_interface_index;
uint32_t array_size = to_array_size_literal(result_type, 0);
if (multi_patch_tess_ctl)
for (uint32_t j = 0; j < array_size; j++, interface_index++)
{
for (uint32_t j = 0; j < array_size; j++)
{
const uint32_t indices[3] = { i, interface_index, j };
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr +=
access_chain_internal(stage_in_ptr_var_id, indices, 3,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
// If the expression has more vector components than the result type, insert
// a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
// happen if we replace the type of an input variable.
if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
expr_type.vecsize > sub_type.vecsize)
expr += vector_swizzle(sub_type.vecsize, 0);
AccessChainMeta meta;
expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
expr_type.vecsize > sub_type.vecsize)
expr += vector_swizzle(sub_type.vecsize, 0);
if (j + 1 < array_size)
expr += ", ";
}
}
else
{
for (uint32_t j = 0; j < array_size; j++, interface_index++)
{
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr +=
access_chain_internal(stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
expr_type.vecsize > sub_type.vecsize)
expr += vector_swizzle(sub_type.vecsize, 0);
if (j + 1 < array_size)
expr += ", ";
}
if (j + 1 < array_size)
expr += ", ";
}
expr += " })";
if (i + 1 < num_control_points)
@ -6617,7 +6649,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
}
expr += " })";
}
else if (flat_struct)
else if (result_type.basetype == SPIRType::Struct)
{
bool is_array_of_struct = is_array(result_type);
if (is_array_of_struct && !ptr_is_io_variable)
@ -6650,7 +6682,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
const auto &mbr_type = get<SPIRType>(struct_type.member_types[j]);
const auto &expr_mbr_type = get<SPIRType>(expr_type.member_types[j]);
if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl)
if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput)
{
expr += type_to_glsl(mbr_type) + "(";
for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++)
@ -6660,8 +6692,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr += access_chain_internal(
stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
}
else
expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
@ -6677,48 +6709,23 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
{
expr += type_to_glsl(mbr_type) + "({ ";
uint32_t array_size = to_array_size_literal(mbr_type, 0);
if (multi_patch_tess_ctl)
for (uint32_t k = 0; k < array_size; k++, interface_index++)
{
for (uint32_t k = 0; k < array_size; k++)
if (is_array_of_struct)
{
if (is_array_of_struct)
{
const uint32_t indices[3] = { i, interface_index, k };
AccessChainMeta meta;
expr += access_chain_internal(
stage_in_ptr_var_id, indices, 3,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
}
else
expr += join(to_expression(ptr), ".", to_member_name(iface_type, interface_index), "[",
k, "]");
if (expr_mbr_type.vecsize > mbr_type.vecsize)
expr += vector_swizzle(mbr_type.vecsize, 0);
if (k + 1 < array_size)
expr += ", ";
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr += access_chain_internal(
stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
}
}
else
{
for (uint32_t k = 0; k < array_size; k++, interface_index++)
{
if (is_array_of_struct)
{
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr += access_chain_internal(
stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
}
else
expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
if (expr_mbr_type.vecsize > mbr_type.vecsize)
expr += vector_swizzle(mbr_type.vecsize, 0);
else
expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
if (expr_mbr_type.vecsize > mbr_type.vecsize)
expr += vector_swizzle(mbr_type.vecsize, 0);
if (k + 1 < array_size)
expr += ", ";
}
if (k + 1 < array_size)
expr += ", ";
}
expr += " })";
}
@ -6748,7 +6755,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
if (is_array_of_struct)
expr += " })";
}
else if (flat_matrix)
else if (is_matrix(result_type))
{
bool is_array_of_matrix = is_array(result_type);
if (is_array_of_matrix && !ptr_is_io_variable)
@ -6774,9 +6781,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id
const uint32_t indices[2] = { i, interface_index };
AccessChainMeta meta;
expr +=
access_chain_internal(stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
if (expr_type.vecsize > result_type.vecsize)
expr += vector_swizzle(result_type.vecsize, 0);
if (j + 1 < result_type.columns)
@ -6867,7 +6873,8 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
bool patch = false;
bool flat_data = false;
bool ptr_is_chain = false;
bool multi_patch = get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup;
bool flatten_composites = false;
bool is_block = false;
if (var)
@ -6875,13 +6882,15 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
if (var)
{
flatten_composites = variable_storage_requires_stage_io(var->storage);
patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var));
// Should match strip_array in add_interface_block.
flat_data = var->storage == StorageClassInput ||
(var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl);
if (patch && (!is_block || var->storage != StorageClassOutput))
// Patch inputs are treated as normal block IO variables, so they don't deal with this path at all.
if (patch && (!is_block || var->storage == StorageClassInput))
flat_data = false;
// We might have a chained access chain, where
@ -6943,7 +6952,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
VariableID stage_var_id;
if (patch)
stage_var_id = patch_stage_out_var_id;
stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id;
else
stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
@ -6957,8 +6966,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
auto &result_ptr_type = get<SPIRType>(ops[0]);
uint32_t const_mbr_id = next_id++;
uint32_t index = get_extended_decoration(var->self, SPIRVCrossDecorationInterfaceMemberIndex);
if (var->storage == StorageClassInput || is_block)
uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
if (flatten_composites || is_block)
{
uint32_t i = first_non_array_index;
auto *type = &get_variable_element_type(*var);
@ -6977,9 +6987,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
// In this case, we're poking into flattened structures and arrays, so now we have to
// combine the following indices. If we encounter a non-constant index,
// we're hosed.
for (; i < length; ++i)
for (; flatten_composites && i < length; ++i)
{
if ((multi_patch || (!is_array(*type) && !is_matrix(*type))) && type->basetype != SPIRType::Struct)
if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct)
break;
auto *c = maybe_get<SPIRConstant>(ops[i]);
@ -7007,31 +7017,48 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
type = &get<SPIRType>(type->member_types[c->scalar()]);
}
if ((!multi_patch && (is_matrix(result_ptr_type) || is_array(result_ptr_type))) ||
result_ptr_type.basetype == SPIRType::Struct)
{
// We're not going to emit the actual member name, we let any further OpLoad take care of that.
// Tag the access chain with the member index we're referencing.
set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index);
}
else
// We're not going to emit the actual member name, we let any further OpLoad take care of that.
// Tag the access chain with the member index we're referencing.
bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) ||
result_ptr_type.basetype == SPIRType::Struct);
if (!defer_access_chain)
{
// Access the appropriate member of gl_in/gl_out.
set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
indices.push_back(const_mbr_id);
// Member index is now irrelevant.
index = uint32_t(-1);
// Append any straggling access chain indices.
if (i < length)
indices.insert(indices.end(), ops + i, ops + length);
}
else
{
// We must have consumed the entire access chain if we're deferring it.
assert(i == length);
}
if (index != uint32_t(-1))
set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index);
else
unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
}
else
{
assert(index != uint32_t(-1));
set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
indices.push_back(const_mbr_id);
if (index != uint32_t(-1))
{
set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
indices.push_back(const_mbr_id);
}
indices.insert(indices.end(), ops + 4, ops + length);
// Member index is now irrelevant.
index = uint32_t(-1);
unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
indices.insert(indices.end(), ops + first_non_array_index, ops + length);
}
// We use the pointer to the base of the input/output array here,
@ -7057,7 +7084,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
// First one is the gl_in/gl_out struct itself, then an index into that array.
// If we have traversed further, we use a normal access chain formulation.
auto *ptr_expr = maybe_get<SPIRExpression>(ptr);
if (ptr_expr && ptr_expr->implied_read_expressions.size() == 2)
if (flatten_composites && ptr_expr && ptr_expr->implied_read_expressions.size() == 2)
{
e = join(to_expression(ptr),
access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()),
@ -10665,12 +10692,16 @@ uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn
auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
uint32_t count = type_to_location_count(mbr_type);
// This should always be 1.
if (count != 1)
return k_unknown_location;
loc = 0;
while (location_inputs_in_use.count(loc) != 0)
const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
for (uint32_t i = 0; i < location_count; i++)
if (location_inputs_in_use.count(location + i) != 0)
return true;
return false;
};
while (location_range_in_use(loc, count))
loc++;
set_member_decoration(type_id, index, DecorationLocation, loc);

View File

@ -1077,6 +1077,8 @@ protected:
bool type_is_pointer_to_pointer(const SPIRType &type) const;
bool is_supported_argument_buffer_type(const SPIRType &type) const;
bool variable_storage_requires_stage_io(spv::StorageClass storage) const;
// OpcodeHandler that handles several MSL preprocessing operations.
struct OpCodePreprocessor : OpcodeHandler
{