diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc index e47d56a2..c5e309ad 100644 --- a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc +++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -16,8 +16,7 @@ struct main0_out struct main0_in { - float3 Boo_a; - uint3 Boo_b; + Boo vInput; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -26,8 +25,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - Boo _26 = Boo{ gl_in[gl_InvocationID].Boo_a, gl_in[gl_InvocationID].Boo_b }; - gl_out[gl_InvocationID].vVertex = _26; + gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput; spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert index 6c98e774..497cf2d5 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 out.gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert index c26f7f2b..aaa41d42 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 out.gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert index e6a6ff9e..3142d14c 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 v1; float4 gl_Position; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000..d4a59bb7 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,107 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray Block_a; + float Block_b; + float2x2 Block_m; + Meep Block_meep; + spvUnsafeArray Block_meeps; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + patchOut.a[0] = 1.0; + patchOut.a[1] = 2.0; + patchOut.b = 3.0; + patchOut.m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0)); + patchOut.meep.a = 4.0; + patchOut.meep.b = 5.0; + patchOut.meeps[0].a = 6.0; + patchOut.meeps[0].b = 7.0; + patchOut.meeps[1].a = 8.0; + patchOut.meeps[1].b = 9.0; + patchOut.Block_a[0] = 1.0; + patchOut.Block_a[1] = 2.0; + patchOut.Block_b = 3.0; + patchOut.Block_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0)); + patchOut.Block_meep.a = 4.0; + patchOut.Block_meep.b = 5.0; + patchOut.Block_meeps[0].a = 6.0; + patchOut.Block_meeps[0].b = 7.0; + patchOut.Block_meeps[1].a = 8.0; + patchOut.Block_meeps[1].b = 9.0; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc index 8bd5515b..add59f69 100644 --- a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -58,9 +58,7 @@ struct main0_out struct main0_in { - float4x4 VertexData_a; - spvUnsafeArray VertexData_b; - float4 VertexData_c; + VertexData vInputs; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -69,10 +67,10 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - spvUnsafeArray _19 = spvUnsafeArray({ VertexData{ gl_in[0].VertexData_a, spvUnsafeArray({ gl_in[0].VertexData_b[0], gl_in[0].VertexData_b[1] }), gl_in[0].VertexData_c }, VertexData{ gl_in[1].VertexData_a, spvUnsafeArray({ gl_in[1].VertexData_b[0], gl_in[1].VertexData_b[1] }), gl_in[1].VertexData_c }, VertexData{ gl_in[2].VertexData_a, spvUnsafeArray({ gl_in[2].VertexData_b[0], gl_in[2].VertexData_b[1] }), gl_in[2].VertexData_c }, VertexData{ gl_in[3].VertexData_a, spvUnsafeArray({ gl_in[3].VertexData_b[0], gl_in[3].VertexData_b[1] }), gl_in[3].VertexData_c }, VertexData{ gl_in[4].VertexData_a, spvUnsafeArray({ gl_in[4].VertexData_b[0], gl_in[4].VertexData_b[1] }), gl_in[4].VertexData_c }, VertexData{ gl_in[5].VertexData_a, spvUnsafeArray({ gl_in[5].VertexData_b[0], gl_in[5].VertexData_b[1] }), gl_in[5].VertexData_c }, VertexData{ gl_in[6].VertexData_a, spvUnsafeArray({ gl_in[6].VertexData_b[0], gl_in[6].VertexData_b[1] }), gl_in[6].VertexData_c }, VertexData{ gl_in[7].VertexData_a, spvUnsafeArray({ gl_in[7].VertexData_b[0], gl_in[7].VertexData_b[1] }), gl_in[7].VertexData_c }, VertexData{ gl_in[8].VertexData_a, spvUnsafeArray({ gl_in[8].VertexData_b[0], gl_in[8].VertexData_b[1] }), gl_in[8].VertexData_c }, VertexData{ gl_in[9].VertexData_a, spvUnsafeArray({ gl_in[9].VertexData_b[0], gl_in[9].VertexData_b[1] }), gl_in[9].VertexData_c }, VertexData{ gl_in[10].VertexData_a, spvUnsafeArray({ gl_in[10].VertexData_b[0], gl_in[10].VertexData_b[1] }), gl_in[10].VertexData_c }, VertexData{ gl_in[11].VertexData_a, spvUnsafeArray({ gl_in[11].VertexData_b[0], gl_in[11].VertexData_b[1] }), gl_in[11].VertexData_c }, VertexData{ gl_in[12].VertexData_a, spvUnsafeArray({ gl_in[12].VertexData_b[0], gl_in[12].VertexData_b[1] }), gl_in[12].VertexData_c }, VertexData{ gl_in[13].VertexData_a, spvUnsafeArray({ gl_in[13].VertexData_b[0], gl_in[13].VertexData_b[1] }), gl_in[13].VertexData_c }, VertexData{ gl_in[14].VertexData_a, spvUnsafeArray({ gl_in[14].VertexData_b[0], gl_in[14].VertexData_b[1] }), gl_in[14].VertexData_c }, VertexData{ gl_in[15].VertexData_a, spvUnsafeArray({ gl_in[15].VertexData_b[0], gl_in[15].VertexData_b[1] }), gl_in[15].VertexData_c }, VertexData{ gl_in[16].VertexData_a, spvUnsafeArray({ gl_in[16].VertexData_b[0], gl_in[16].VertexData_b[1] }), gl_in[16].VertexData_c }, VertexData{ gl_in[17].VertexData_a, spvUnsafeArray({ gl_in[17].VertexData_b[0], gl_in[17].VertexData_b[1] }), gl_in[17].VertexData_c }, VertexData{ gl_in[18].VertexData_a, spvUnsafeArray({ gl_in[18].VertexData_b[0], gl_in[18].VertexData_b[1] }), gl_in[18].VertexData_c }, VertexData{ gl_in[19].VertexData_a, spvUnsafeArray({ gl_in[19].VertexData_b[0], gl_in[19].VertexData_b[1] }), gl_in[19].VertexData_c }, VertexData{ gl_in[20].VertexData_a, spvUnsafeArray({ gl_in[20].VertexData_b[0], gl_in[20].VertexData_b[1] }), gl_in[20].VertexData_c }, VertexData{ gl_in[21].VertexData_a, spvUnsafeArray({ gl_in[21].VertexData_b[0], gl_in[21].VertexData_b[1] }), gl_in[21].VertexData_c }, VertexData{ gl_in[22].VertexData_a, spvUnsafeArray({ gl_in[22].VertexData_b[0], gl_in[22].VertexData_b[1] }), gl_in[22].VertexData_c }, VertexData{ gl_in[23].VertexData_a, spvUnsafeArray({ gl_in[23].VertexData_b[0], gl_in[23].VertexData_b[1] }), gl_in[23].VertexData_c }, VertexData{ gl_in[24].VertexData_a, spvUnsafeArray({ gl_in[24].VertexData_b[0], gl_in[24].VertexData_b[1] }), gl_in[24].VertexData_c }, VertexData{ gl_in[25].VertexData_a, spvUnsafeArray({ gl_in[25].VertexData_b[0], gl_in[25].VertexData_b[1] }), gl_in[25].VertexData_c }, VertexData{ gl_in[26].VertexData_a, spvUnsafeArray({ gl_in[26].VertexData_b[0], gl_in[26].VertexData_b[1] }), gl_in[26].VertexData_c }, VertexData{ gl_in[27].VertexData_a, spvUnsafeArray({ gl_in[27].VertexData_b[0], gl_in[27].VertexData_b[1] }), gl_in[27].VertexData_c }, VertexData{ gl_in[28].VertexData_a, spvUnsafeArray({ gl_in[28].VertexData_b[0], gl_in[28].VertexData_b[1] }), gl_in[28].VertexData_c }, VertexData{ gl_in[29].VertexData_a, spvUnsafeArray({ gl_in[29].VertexData_b[0], gl_in[29].VertexData_b[1] }), gl_in[29].VertexData_c }, VertexData{ gl_in[30].VertexData_a, spvUnsafeArray({ gl_in[30].VertexData_b[0], gl_in[30].VertexData_b[1] }), gl_in[30].VertexData_c }, VertexData{ gl_in[31].VertexData_a, spvUnsafeArray({ gl_in[31].VertexData_b[0], gl_in[31].VertexData_b[1] }), gl_in[31].VertexData_c } }); + spvUnsafeArray _19 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); spvUnsafeArray tmp; tmp = _19; int _27 = gl_InvocationID ^ 1; - gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].VertexData_c; + gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].vInputs.c; } diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc index f920abf4..e16e0c0b 100644 --- a/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc +++ b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc @@ -64,7 +64,7 @@ struct HSConstantOut struct VertexOutput_1 { - float3 uv; + float2 uv; }; struct HSOut_1 @@ -80,8 +80,8 @@ struct main0_out struct main0_in { - float3 VertexOutput_uv; - ushort2 m_172; + VertexOutput_1 p; + ushort2 m_171; float4 gl_Position; }; @@ -113,11 +113,11 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1]); spvUnsafeArray p; p[0].pos = gl_in[0].gl_Position; - p[0].uv = gl_in[0].VertexOutput_uv.xy; + p[0].uv = gl_in[0].p.uv; p[1].pos = gl_in[1].gl_Position; - p[1].uv = gl_in[1].VertexOutput_uv.xy; + p[1].uv = gl_in[1].p.uv; p[2].pos = gl_in[2].gl_Position; - p[2].uv = gl_in[2].VertexOutput_uv.xy; + p[2].uv = gl_in[2].p.uv; uint i = gl_InvocationID; spvUnsafeArray param; param = p; diff --git a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc index e47d56a2..c5e309ad 100644 --- a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc +++ b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -16,8 +16,7 @@ struct main0_out struct main0_in { - float3 Boo_a; - uint3 Boo_b; + Boo vInput; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -26,8 +25,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - Boo _26 = Boo{ gl_in[gl_InvocationID].Boo_a, gl_in[gl_InvocationID].Boo_b }; - gl_out[gl_InvocationID].vVertex = _26; + gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput; spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert index da189e48..6aaefd1e 100644 --- a/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert +++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -11,8 +11,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; static inline __attribute__((always_inline)) @@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 if (any(gl_GlobalInvocationID >= spvStageInputSize)) return; write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert index cf52d617..7f77f945 100644 --- a/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert +++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -11,8 +11,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; static inline __attribute__((always_inline)) @@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 if (any(gl_GlobalInvocationID >= spvStageInputSize)) return; write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance); - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert index 65cbdb88..63ebd678 100644 --- a/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert +++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -11,8 +11,6 @@ struct main0_out float4 v1; float4 gl_Position; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; static inline __attribute__((always_inline)) @@ -33,7 +31,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 if (any(gl_GlobalInvocationID >= spvStageInputSize)) return; write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance); - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000..b27f8ad9 --- /dev/null +++ b/reference/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,113 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray meeps; + spvUnsafeArray Block_a; + float Block_b; + float2x2 Block_m; + Meep Block_meep; + spvUnsafeArray Block_meeps; +}; + +static inline __attribute__((always_inline)) +void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray& a, device float& b, device float2x2& m, device Meep& meep, device spvUnsafeArray& meeps, device main0_patchOut& patchOut) +{ + gl_out[gl_InvocationID].gl_Position = float4(1.0); + a[0] = 1.0; + a[1] = 2.0; + b = 3.0; + m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0)); + meep.a = 4.0; + meep.b = 5.0; + meeps[0].a = 6.0; + meeps[0].b = 7.0; + meeps[1].a = 8.0; + meeps[1].b = 9.0; + patchOut.Block_a[0] = 1.0; + patchOut.Block_a[1] = 2.0; + patchOut.Block_b = 3.0; + patchOut.Block_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0)); + patchOut.Block_meep.a = 4.0; + patchOut.Block_meep.b = 5.0; + patchOut.Block_meeps[0].a = 6.0; + patchOut.Block_meeps[0].b = 7.0; + patchOut.Block_meeps[1].a = 8.0; + patchOut.Block_meeps[1].b = 9.0; +} + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + write_in_func(gl_out, gl_InvocationID, patchOut.a, patchOut.b, patchOut.m, patchOut.meep, patchOut.meeps, patchOut); +} + diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc index ad23ea7f..a743298b 100644 --- a/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc +++ b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -58,9 +58,7 @@ struct main0_out struct main0_in { - float4x4 VertexData_a; - spvUnsafeArray VertexData_b; - float4 VertexData_c; + VertexData vInputs; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -69,12 +67,10 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - spvUnsafeArray _19 = spvUnsafeArray({ VertexData{ gl_in[0].VertexData_a, spvUnsafeArray({ gl_in[0].VertexData_b[0], gl_in[0].VertexData_b[1] }), gl_in[0].VertexData_c }, VertexData{ gl_in[1].VertexData_a, spvUnsafeArray({ gl_in[1].VertexData_b[0], gl_in[1].VertexData_b[1] }), gl_in[1].VertexData_c }, VertexData{ gl_in[2].VertexData_a, spvUnsafeArray({ gl_in[2].VertexData_b[0], gl_in[2].VertexData_b[1] }), gl_in[2].VertexData_c }, VertexData{ gl_in[3].VertexData_a, spvUnsafeArray({ gl_in[3].VertexData_b[0], gl_in[3].VertexData_b[1] }), gl_in[3].VertexData_c }, VertexData{ gl_in[4].VertexData_a, spvUnsafeArray({ gl_in[4].VertexData_b[0], gl_in[4].VertexData_b[1] }), gl_in[4].VertexData_c }, VertexData{ gl_in[5].VertexData_a, spvUnsafeArray({ gl_in[5].VertexData_b[0], gl_in[5].VertexData_b[1] }), gl_in[5].VertexData_c }, VertexData{ gl_in[6].VertexData_a, spvUnsafeArray({ gl_in[6].VertexData_b[0], gl_in[6].VertexData_b[1] }), gl_in[6].VertexData_c }, VertexData{ gl_in[7].VertexData_a, spvUnsafeArray({ gl_in[7].VertexData_b[0], gl_in[7].VertexData_b[1] }), gl_in[7].VertexData_c }, VertexData{ gl_in[8].VertexData_a, spvUnsafeArray({ gl_in[8].VertexData_b[0], gl_in[8].VertexData_b[1] }), gl_in[8].VertexData_c }, VertexData{ gl_in[9].VertexData_a, spvUnsafeArray({ gl_in[9].VertexData_b[0], gl_in[9].VertexData_b[1] }), gl_in[9].VertexData_c }, VertexData{ gl_in[10].VertexData_a, spvUnsafeArray({ gl_in[10].VertexData_b[0], gl_in[10].VertexData_b[1] }), gl_in[10].VertexData_c }, VertexData{ gl_in[11].VertexData_a, spvUnsafeArray({ gl_in[11].VertexData_b[0], gl_in[11].VertexData_b[1] }), gl_in[11].VertexData_c }, VertexData{ gl_in[12].VertexData_a, spvUnsafeArray({ gl_in[12].VertexData_b[0], gl_in[12].VertexData_b[1] }), gl_in[12].VertexData_c }, VertexData{ gl_in[13].VertexData_a, spvUnsafeArray({ gl_in[13].VertexData_b[0], gl_in[13].VertexData_b[1] }), gl_in[13].VertexData_c }, VertexData{ gl_in[14].VertexData_a, spvUnsafeArray({ gl_in[14].VertexData_b[0], gl_in[14].VertexData_b[1] }), gl_in[14].VertexData_c }, VertexData{ gl_in[15].VertexData_a, spvUnsafeArray({ gl_in[15].VertexData_b[0], gl_in[15].VertexData_b[1] }), gl_in[15].VertexData_c }, VertexData{ gl_in[16].VertexData_a, spvUnsafeArray({ gl_in[16].VertexData_b[0], gl_in[16].VertexData_b[1] }), gl_in[16].VertexData_c }, VertexData{ gl_in[17].VertexData_a, spvUnsafeArray({ gl_in[17].VertexData_b[0], gl_in[17].VertexData_b[1] }), gl_in[17].VertexData_c }, VertexData{ gl_in[18].VertexData_a, spvUnsafeArray({ gl_in[18].VertexData_b[0], gl_in[18].VertexData_b[1] }), gl_in[18].VertexData_c }, VertexData{ gl_in[19].VertexData_a, spvUnsafeArray({ gl_in[19].VertexData_b[0], gl_in[19].VertexData_b[1] }), gl_in[19].VertexData_c }, VertexData{ gl_in[20].VertexData_a, spvUnsafeArray({ gl_in[20].VertexData_b[0], gl_in[20].VertexData_b[1] }), gl_in[20].VertexData_c }, VertexData{ gl_in[21].VertexData_a, spvUnsafeArray({ gl_in[21].VertexData_b[0], gl_in[21].VertexData_b[1] }), gl_in[21].VertexData_c }, VertexData{ gl_in[22].VertexData_a, spvUnsafeArray({ gl_in[22].VertexData_b[0], gl_in[22].VertexData_b[1] }), gl_in[22].VertexData_c }, VertexData{ gl_in[23].VertexData_a, spvUnsafeArray({ gl_in[23].VertexData_b[0], gl_in[23].VertexData_b[1] }), gl_in[23].VertexData_c }, VertexData{ gl_in[24].VertexData_a, spvUnsafeArray({ gl_in[24].VertexData_b[0], gl_in[24].VertexData_b[1] }), gl_in[24].VertexData_c }, VertexData{ gl_in[25].VertexData_a, spvUnsafeArray({ gl_in[25].VertexData_b[0], gl_in[25].VertexData_b[1] }), gl_in[25].VertexData_c }, VertexData{ gl_in[26].VertexData_a, spvUnsafeArray({ gl_in[26].VertexData_b[0], gl_in[26].VertexData_b[1] }), gl_in[26].VertexData_c }, VertexData{ gl_in[27].VertexData_a, spvUnsafeArray({ gl_in[27].VertexData_b[0], gl_in[27].VertexData_b[1] }), gl_in[27].VertexData_c }, VertexData{ gl_in[28].VertexData_a, spvUnsafeArray({ gl_in[28].VertexData_b[0], gl_in[28].VertexData_b[1] }), gl_in[28].VertexData_c }, VertexData{ gl_in[29].VertexData_a, spvUnsafeArray({ gl_in[29].VertexData_b[0], gl_in[29].VertexData_b[1] }), gl_in[29].VertexData_c }, VertexData{ gl_in[30].VertexData_a, spvUnsafeArray({ gl_in[30].VertexData_b[0], gl_in[30].VertexData_b[1] }), gl_in[30].VertexData_c }, VertexData{ gl_in[31].VertexData_a, spvUnsafeArray({ gl_in[31].VertexData_b[0], gl_in[31].VertexData_b[1] }), gl_in[31].VertexData_c } }); + spvUnsafeArray _19 = spvUnsafeArray({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); spvUnsafeArray tmp; tmp = _19; - int _27 = gl_InvocationID ^ 1; - VertexData _30 = VertexData{ gl_in[_27].VertexData_a, spvUnsafeArray({ gl_in[_27].VertexData_b[0], gl_in[_27].VertexData_b[1] }), gl_in[_27].VertexData_c }; - VertexData tmp_single = _30; + VertexData tmp_single = gl_in[gl_InvocationID ^ 1].vInputs; gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c; } diff --git a/shaders-msl/tesc/complex-patch-out-types.tesc b/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000..fd56ae46 --- /dev/null +++ b/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,55 @@ +#version 450 +layout(vertices = 4) out; + +struct Meep +{ + float a; + float b; +}; + +layout(location = 0) patch out float a[2]; +layout(location = 2) patch out float b; +layout(location = 3) patch out mat2 m; +layout(location = 5) patch out Meep meep; +layout(location = 7) patch out Meep meeps[2]; + +layout(location = 11) patch out Block +{ + float a[2]; + float b; + mat2 m; + Meep meep; + Meep meeps[2]; +} B; + +void write_in_func() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + + a[0] = 1.0; + a[1] = 2.0; + b = 3.0; + m = mat2(2.0); + meep.a = 4.0; + meep.b = 5.0; + meeps[0].a = 6.0; + meeps[0].b = 7.0; + meeps[1].a = 8.0; + meeps[1].b = 9.0; + + B.a[0] = 1.0; + B.a[1] = 2.0; + B.b = 3.0; + B.m = mat2(4.0); + B.meep.a = 4.0; + B.meep.b = 5.0; + B.meeps[0].a = 6.0; + B.meeps[0].b = 7.0; + B.meeps[1].a = 8.0; + B.meeps[1].b = 9.0; +} + +void main() +{ + write_in_func(); +} diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4f6e205e..774e08f2 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1731,8 +1731,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Add the global variables as arguments to the function if (func_id != ir.default_entry_point) { - bool added_in = false; - bool added_out = false; + bool control_point_added_in = false; + bool control_point_added_out = false; + bool patch_added_in = false; + bool patch_added_out = false; + for (uint32_t arg_id : added_arg_ids) { auto &var = get(arg_id); @@ -1741,16 +1744,19 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type); + bool is_block = has_decoration(p_type->self, DecorationBlock); bool is_control_point_storage = !is_patch && ((is_tessellation_shader() && var.storage == StorageClassInput) || (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)); + bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput; bool is_builtin = is_builtin_variable(var); bool variable_is_stage_io = !is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || p_type->basetype == SPIRType::Struct; - bool is_redirected_to_global_stage_io = is_control_point_storage && variable_is_stage_io; + bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) && + variable_is_stage_io; // If output is masked it is not considered part of the global stage IO interface. if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput) @@ -1762,7 +1768,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. // We collected them into a structure; we must pass the array of this // structure to the function. - std::string name = var.storage == StorageClassInput ? "gl_in" : "gl_out"; + std::string name; + if (is_patch) + name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name; + else + name = var.storage == StorageClassInput ? "gl_in" : "gl_out"; if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock) && @@ -1797,16 +1807,18 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // structure to the function. if (var.storage == StorageClassInput) { + auto &added_in = is_patch ? patch_added_in : control_point_added_in; if (added_in) continue; - arg_id = stage_in_ptr_var_id; + arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id; added_in = true; } else if (var.storage == StorageClassOutput) { + auto &added_out = is_patch ? patch_added_out : control_point_added_out; if (added_out) continue; - arg_id = stage_out_ptr_var_id; + arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id; added_out = true; } @@ -1915,14 +1927,23 @@ void CompilerMSL::mark_as_packable(SPIRType &type) uint32_t CompilerMSL::type_to_location_count(const SPIRType &type) const { - // In MSL, we cannot place structs in any context where we need locations. - assert(type.basetype != SPIRType::Struct); + uint32_t count; + if (type.basetype == SPIRType::Struct) + { + uint32_t mbr_count = uint32_t(type.member_types.size()); + count = 0; + for (uint32_t i = 0; i < mbr_count; i++) + count += type_to_location_count(get(type.member_types[i])); + } + else + { + count = type.columns > 1 ? type.columns : 1; + } - uint32_t dim = 1; - for (uint32_t i = 0; i < type.array.size(); i++) - dim *= to_array_size_literal(type, i); + uint32_t dim_count = uint32_t(type.array.size()); + for (uint32_t i = 0; i < dim_count; i++) + count *= to_array_size_literal(type, i); - uint32_t count = dim * type.columns; return count; } @@ -2911,6 +2932,16 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_ } } +bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const +{ + if (storage == StorageClassOutput) + return !capture_output_to_buffer; + else if (storage == StorageClassInput) + return !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup); + else + return false; +} + void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta) { @@ -2942,7 +2973,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st if (var_type.basetype == SPIRType::Struct) { - bool block_requires_flattening = is_block || !capture_output_to_buffer || storage == StorageClassInput; + bool block_requires_flattening = variable_storage_requires_stage_io(storage) || is_block; bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration; if (needs_local_declaration) @@ -3009,10 +3040,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type); bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; - bool storage_is_stage_io = - (storage == StorageClassInput && !(get_execution_model() == ExecutionModelTessellationControl && - msl_options.multi_patch_workgroup)) || - storage == StorageClassOutput; + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); // ClipDistance always needs to be declared as user attributes. if (builtin == BuiltInClipDistance) @@ -3042,10 +3070,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st if (!is_builtin || has_active_builtin(builtin, storage)) { bool is_composite_type = is_matrix(var_type) || is_array(var_type); - bool storage_is_stage_io = - (storage == StorageClassInput && - !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup)) || - (storage == StorageClassOutput && !capture_output_to_buffer); + bool storage_is_stage_io = variable_storage_requires_stage_io(storage); bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; // ClipDistance always needs to be declared as user attributes. @@ -3088,8 +3113,11 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t auto &type = get_variable_element_type(var); + bool flatten_composites = variable_storage_requires_stage_io(var.storage); + bool is_block = has_decoration(type.self, DecorationBlock); + uint32_t mbr_idx = uint32_t(-1); - if (type.basetype == SPIRType::Struct) + if (type.basetype == SPIRType::Struct && (flatten_composites || is_block)) mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); if (mbr_idx != uint32_t(-1)) @@ -3580,6 +3608,10 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat { auto &type = get(type_id); + // Struct types must match exactly. + if (type.basetype == SPIRType::Struct) + return type_id; + auto p_va = inputs_by_location.find(location); if (p_va == end(inputs_by_location)) { @@ -6525,16 +6557,22 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation) return false; - bool multi_patch_tess_ctl = get_execution_model() == ExecutionModelTessellationControl && - msl_options.multi_patch_workgroup && ptr_type.storage == StorageClassInput; - bool flat_matrix = is_matrix(result_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl; - bool flat_struct = result_type.basetype == SPIRType::Struct && ptr_type.storage == StorageClassInput; - bool flat_data_type = flat_matrix || is_array(result_type) || flat_struct; - if (!flat_data_type) - return false; - if (has_decoration(ptr, DecorationPatch)) return false; + bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; + + bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage); + + bool flat_data_type = flattened_io && + (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct); + + // Edge case, even with multi-patch workgroups, we still need to unroll load + // if we're loading control points directly. + if (ptr_is_io_variable && is_array(result_type)) + flat_data_type = true; + + if (!flat_data_type) + return false; // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out. // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup @@ -6543,12 +6581,31 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex); auto *var = maybe_get_backing_variable(ptr); - bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; auto &expr_type = get_pointee_type(ptr_type.self); const auto &iface_type = expression_type(stage_in_ptr_var_id); - if (result_type.array.size() > 2) + if (!flattened_io) + { + // Simplest case for multi-patch workgroups, just unroll array as-is. + if (interface_index == uint32_t(-1)) + return false; + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1); + + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (result_type.array.size() > 2) { SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions."); } @@ -6558,7 +6615,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable."); if (interface_index == uint32_t(-1)) SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); - if (result_type.basetype == SPIRType::Struct || flat_matrix) + if (result_type.basetype == SPIRType::Struct || is_matrix(result_type)) SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO."); expr += type_to_glsl(result_type) + "({ "; @@ -6572,44 +6629,19 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id expr += type_to_glsl(sub_type) + "({ "; interface_index = base_interface_index; uint32_t array_size = to_array_size_literal(result_type, 0); - if (multi_patch_tess_ctl) + for (uint32_t j = 0; j < array_size; j++, interface_index++) { - for (uint32_t j = 0; j < array_size; j++) - { - const uint32_t indices[3] = { i, interface_index, j }; + const uint32_t indices[2] = { i, interface_index }; - AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 3, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - // If the expression has more vector components than the result type, insert - // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might - // happen if we replace the type of an input variable. - if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && - expr_type.vecsize > sub_type.vecsize) - expr += vector_swizzle(sub_type.vecsize, 0); + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && + expr_type.vecsize > sub_type.vecsize) + expr += vector_swizzle(sub_type.vecsize, 0); - if (j + 1 < array_size) - expr += ", "; - } - } - else - { - for (uint32_t j = 0; j < array_size; j++, interface_index++) - { - const uint32_t indices[2] = { i, interface_index }; - - AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && - expr_type.vecsize > sub_type.vecsize) - expr += vector_swizzle(sub_type.vecsize, 0); - - if (j + 1 < array_size) - expr += ", "; - } + if (j + 1 < array_size) + expr += ", "; } expr += " })"; if (i + 1 < num_control_points) @@ -6617,7 +6649,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id } expr += " })"; } - else if (flat_struct) + else if (result_type.basetype == SPIRType::Struct) { bool is_array_of_struct = is_array(result_type); if (is_array_of_struct && !ptr_is_io_variable) @@ -6650,7 +6682,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const auto &mbr_type = get(struct_type.member_types[j]); const auto &expr_mbr_type = get(expr_type.member_types[j]); - if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl) + if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput) { expr += type_to_glsl(mbr_type) + "("; for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++) @@ -6660,8 +6692,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const uint32_t indices[2] = { i, interface_index }; AccessChainMeta meta; expr += access_chain_internal( - stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); } else expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); @@ -6677,48 +6709,23 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id { expr += type_to_glsl(mbr_type) + "({ "; uint32_t array_size = to_array_size_literal(mbr_type, 0); - if (multi_patch_tess_ctl) + for (uint32_t k = 0; k < array_size; k++, interface_index++) { - for (uint32_t k = 0; k < array_size; k++) + if (is_array_of_struct) { - if (is_array_of_struct) - { - const uint32_t indices[3] = { i, interface_index, k }; - AccessChainMeta meta; - expr += access_chain_internal( - stage_in_ptr_var_id, indices, 3, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - } - else - expr += join(to_expression(ptr), ".", to_member_name(iface_type, interface_index), "[", - k, "]"); - if (expr_mbr_type.vecsize > mbr_type.vecsize) - expr += vector_swizzle(mbr_type.vecsize, 0); - - if (k + 1 < array_size) - expr += ", "; + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); } - } - else - { - for (uint32_t k = 0; k < array_size; k++, interface_index++) - { - if (is_array_of_struct) - { - const uint32_t indices[2] = { i, interface_index }; - AccessChainMeta meta; - expr += access_chain_internal( - stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); - } - else - expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); - if (expr_mbr_type.vecsize > mbr_type.vecsize) - expr += vector_swizzle(mbr_type.vecsize, 0); + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); - if (k + 1 < array_size) - expr += ", "; - } + if (k + 1 < array_size) + expr += ", "; } expr += " })"; } @@ -6748,7 +6755,7 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id if (is_array_of_struct) expr += " })"; } - else if (flat_matrix) + else if (is_matrix(result_type)) { bool is_array_of_matrix = is_array(result_type); if (is_array_of_matrix && !ptr_is_io_variable) @@ -6774,9 +6781,8 @@ bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id const uint32_t indices[2] = { i, interface_index }; AccessChainMeta meta; - expr += - access_chain_internal(stage_in_ptr_var_id, indices, 2, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); if (expr_type.vecsize > result_type.vecsize) expr += vector_swizzle(result_type.vecsize, 0); if (j + 1 < result_type.columns) @@ -6867,7 +6873,8 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l bool patch = false; bool flat_data = false; bool ptr_is_chain = false; - bool multi_patch = get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup; + bool flatten_composites = false; + bool is_block = false; if (var) @@ -6875,13 +6882,15 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l if (var) { + flatten_composites = variable_storage_requires_stage_io(var->storage); patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var)); // Should match strip_array in add_interface_block. flat_data = var->storage == StorageClassInput || (var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl); - if (patch && (!is_block || var->storage != StorageClassOutput)) + // Patch inputs are treated as normal block IO variables, so they don't deal with this path at all. + if (patch && (!is_block || var->storage == StorageClassInput)) flat_data = false; // We might have a chained access chain, where @@ -6943,7 +6952,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l VariableID stage_var_id; if (patch) - stage_var_id = patch_stage_out_var_id; + stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id; else stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; @@ -6957,8 +6966,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l auto &result_ptr_type = get(ops[0]); uint32_t const_mbr_id = next_id++; - uint32_t index = get_extended_decoration(var->self, SPIRVCrossDecorationInterfaceMemberIndex); - if (var->storage == StorageClassInput || is_block) + uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + + if (flatten_composites || is_block) { uint32_t i = first_non_array_index; auto *type = &get_variable_element_type(*var); @@ -6977,9 +6987,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // In this case, we're poking into flattened structures and arrays, so now we have to // combine the following indices. If we encounter a non-constant index, // we're hosed. - for (; i < length; ++i) + for (; flatten_composites && i < length; ++i) { - if ((multi_patch || (!is_array(*type) && !is_matrix(*type))) && type->basetype != SPIRType::Struct) + if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) break; auto *c = maybe_get(ops[i]); @@ -7007,31 +7017,48 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l type = &get(type->member_types[c->scalar()]); } - if ((!multi_patch && (is_matrix(result_ptr_type) || is_array(result_ptr_type))) || - result_ptr_type.basetype == SPIRType::Struct) - { - // We're not going to emit the actual member name, we let any further OpLoad take care of that. - // Tag the access chain with the member index we're referencing. - set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); - } - else + // We're not going to emit the actual member name, we let any further OpLoad take care of that. + // Tag the access chain with the member index we're referencing. + bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) || + result_ptr_type.basetype == SPIRType::Struct); + + if (!defer_access_chain) { // Access the appropriate member of gl_in/gl_out. set(const_mbr_id, get_uint_type_id(), index, false); indices.push_back(const_mbr_id); + // Member index is now irrelevant. + index = uint32_t(-1); + // Append any straggling access chain indices. if (i < length) indices.insert(indices.end(), ops + i, ops + length); } + else + { + // We must have consumed the entire access chain if we're deferring it. + assert(i == length); + } + + if (index != uint32_t(-1)) + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); + else + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); } else { - assert(index != uint32_t(-1)); - set(const_mbr_id, get_uint_type_id(), index, false); - indices.push_back(const_mbr_id); + if (index != uint32_t(-1)) + { + set(const_mbr_id, get_uint_type_id(), index, false); + indices.push_back(const_mbr_id); + } - indices.insert(indices.end(), ops + 4, ops + length); + // Member index is now irrelevant. + index = uint32_t(-1); + unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex); + + indices.insert(indices.end(), ops + first_non_array_index, ops + length); } // We use the pointer to the base of the input/output array here, @@ -7057,7 +7084,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // First one is the gl_in/gl_out struct itself, then an index into that array. // If we have traversed further, we use a normal access chain formulation. auto *ptr_expr = maybe_get(ptr); - if (ptr_expr && ptr_expr->implied_read_expressions.size() == 2) + if (flatten_composites && ptr_expr && ptr_expr->implied_read_expressions.size() == 2) { e = join(to_expression(ptr), access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()), @@ -10665,12 +10692,16 @@ uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn auto &mbr_type = get(get(type_id).member_types[index]); uint32_t count = type_to_location_count(mbr_type); - // This should always be 1. - if (count != 1) - return k_unknown_location; - loc = 0; - while (location_inputs_in_use.count(loc) != 0) + + const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool { + for (uint32_t i = 0; i < location_count; i++) + if (location_inputs_in_use.count(location + i) != 0) + return true; + return false; + }; + + while (location_range_in_use(loc, count)) loc++; set_member_decoration(type_id, index, DecorationLocation, loc); diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 8bffcdbf..a7533b8c 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1077,6 +1077,8 @@ protected: bool type_is_pointer_to_pointer(const SPIRType &type) const; bool is_supported_argument_buffer_type(const SPIRType &type) const; + bool variable_storage_requires_stage_io(spv::StorageClass storage) const; + // OpcodeHandler that handles several MSL preprocessing operations. struct OpCodePreprocessor : OpcodeHandler {