MSL: Add support for tessellation control shaders.

These are transpiled to kernel functions that write the output of the shader to three buffers: one for per-vertex varyings, one for per-patch varyings, and one for the tessellation levels. This structure is mandated by the way Metal works, where the tessellation factors are supplied to the draw method in their own buffer, while the per-patch and per-vertex varyings are supplied as though they were vertex attributes; since they have different step rates, they must be in separate buffers. The kernel is expected to be run in a workgroup whose size is the greater of the number of input or output control points. It uses Metal's support for vertex-style stage input to a compute shader to get the input values; therefore, at least one instance must run per input point. Meanwhile, Vulkan mandates that it run at least once per output point. Overrunning the output array is a concern, but any values written should either be discarded or overwritten by subsequent patches. I'm probably going to put some slop space in the buffer when I integrate this into MoltenVK to be on the safe side.
2024-11-12 15:10:30 +00:00 · 2019-02-03 23:58:46 -06:00 · 2019-02-03 23:58:46 -06:00 · eb89c3a428
commit eb89c3a428
parent d9ed3dcc7a
21 changed files with 1745 additions and 100 deletions
--- a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@ -0,0 +1,71 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct VertexOutput
+{
+    float4 pos;
+    float2 uv;
+};
+
+struct VertexOutput_1
+{
+    float2 uv;
+};
+
+struct HSOut
+{
+    float2 uv;
+};
+
+struct main0_out
+{
+    HSOut _entryPointOutput;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float2 VertexOutput_uv [[attribute(0)]];
+    float4 gl_Position [[attribute(1)]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+template<typename T, uint N>
+void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } };
+    VertexOutput param[3];
+    spvArrayCopyFromStack1(param, _223);
+    gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos;
+    gl_out[gl_InvocationID]._entryPointOutput.uv = param[gl_InvocationID].uv;
+    threadgroup_barrier(mem_flags::mem_device);
+    if (int(gl_InvocationID) == 0)
+    {
+        float2 _174 = float2(1.0) + gl_in[0].VertexOutput_uv;
+        float _175 = _174.x;
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_175);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_175);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_175);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_175);
+    }
+}
+
--- a/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+struct main0_in
+{
+    float4 gl_Position [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
--- a/reference/opt/shaders-msl/tesc/basic.tesc
+++ b/reference/opt/shaders-msl/tesc/basic.tesc
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+}
+
--- a/reference/opt/shaders-msl/tesc/water_tess.tesc
+++ b/reference/opt/shaders-msl/tesc/water_tess.tesc
@ -0,0 +1,91 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 uScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uMaxTessLevel;
+    float uDistanceMod;
+    float4 uFrustum[6];
+};
+
+struct main0_patchOut
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+struct main0_in
+{
+    float2 vPatchPosBase [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy;
+    float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
+    float3 _445 = float3(_430.x, -10.0, _430.y);
+    float3 _450 = float3(_440.x, 10.0, _440.y);
+    float4 _466 = float4((_445 + _450) * 0.5, 1.0);
+    float3 _513 = float3(length(_450 - _445) * (-0.5));
+    bool _515 = any(float3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)) <= _513);
+    bool _525;
+    if (!_515)
+    {
+        _525 = any(float3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)) <= _513);
+    }
+    else
+    {
+        _525 = _515;
+    }
+    if (!(!_525))
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0);
+    }
+    else
+    {
+        patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase;
+        float2 _678 = (gl_in[0].vPatchPosBase + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float2 _706 = (gl_in[0].vPatchPosBase + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _725 = fast::clamp(log2((length(_41.uCamPos - float3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _734 = (gl_in[0].vPatchPosBase + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float2 _762 = (gl_in[0].vPatchPosBase + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _781 = fast::clamp(log2((length(_41.uCamPos - float3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _790 = (gl_in[0].vPatchPosBase + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _809 = fast::clamp(log2((length(_41.uCamPos - float3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _818 = (gl_in[0].vPatchPosBase + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _837 = fast::clamp(log2((length(_41.uCamPos - float3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _846 = (gl_in[0].vPatchPosBase + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float2 _874 = (gl_in[0].vPatchPosBase + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _893 = fast::clamp(log2((length(_41.uCamPos - float3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _902 = (gl_in[0].vPatchPosBase + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy;
+        float _612 = dot(float4(_781, _809, fast::clamp(log2((length(_41.uCamPos - float3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), float4(0.25));
+        float _618 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), float4(0.25));
+        float _624 = dot(float4(_725, fast::clamp(log2((length(_41.uCamPos - float3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), float4(0.25));
+        float _630 = dot(float4(_809, _837, _893, fast::clamp(log2((length(_41.uCamPos - float3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25));
+        float4 _631 = float4(_612, _618, _624, _630);
+        patchOut.vPatchLods = _631;
+        float4 _928 = exp2(-fast::min(_631, _631.yzwx)) * _41.uMaxTessLevel.y;
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_928.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_928.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_928.z);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_928.w);
+        float _935 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_612, _618), fast::min(_624, _630)), _809));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_935);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_935);
+    }
+}
+
--- a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
+++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
@ -31,7 +31,7 @@ void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
    for (uint i = 0; i < N; dst[i] = src[i], i++);
 }

-float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], int Index1, int Index2)
 {
    float4 indexable[4];
    spvArrayCopyFromStack1(indexable, positions);
@ -40,7 +40,7 @@ float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread cons
    return indexable[Index1] + indexable_1[Index2];
 }

-float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], int Index1, int Index2)
 {
    return consume_constant_arrays2(positions, positions2, Index1, Index2);
 }
--- a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ b/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@ -0,0 +1,111 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct VertexOutput
+{
+    float4 pos;
+    float2 uv;
+};
+
+struct HSOut
+{
+    float4 pos;
+    float2 uv;
+};
+
+struct HSConstantOut
+{
+    float EdgeTess[3];
+    float InsideTess;
+};
+
+struct VertexOutput_1
+{
+    float2 uv;
+};
+
+struct HSOut_1
+{
+    float2 uv;
+};
+
+struct main0_out
+{
+    HSOut_1 _entryPointOutput;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float2 VertexOutput_uv [[attribute(0)]];
+    float4 gl_Position [[attribute(1)]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+template<typename T, uint N>
+void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i)
+{
+    HSOut _output;
+    _output.pos = p[i].pos;
+    _output.uv = p[i].uv;
+    return _output;
+}
+
+HSConstantOut PatchHS(thread const VertexOutput (&_patch)[3])
+{
+    HSConstantOut _output;
+    _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[1] = (float2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[2] = (float2(1.0) + _patch[0].uv).x;
+    _output.InsideTess = (float2(1.0) + _patch[0].uv).x;
+    return _output;
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    VertexOutput p[3];
+    p[0].pos = gl_in[0].gl_Position;
+    p[0].uv = gl_in[0].VertexOutput_uv;
+    p[1].pos = gl_in[1].gl_Position;
+    p[1].uv = gl_in[1].VertexOutput_uv;
+    p[2].pos = gl_in[2].gl_Position;
+    p[2].uv = gl_in[2].VertexOutput_uv;
+    uint i = gl_InvocationID;
+    VertexOutput param[3];
+    spvArrayCopyFromStack1(param, p);
+    uint param_1 = i;
+    HSOut flattenTemp = _hs_main(param, param_1);
+    gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
+    gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv;
+    threadgroup_barrier(mem_flags::mem_device);
+    if (int(gl_InvocationID) == 0)
+    {
+        VertexOutput param_2[3];
+        spvArrayCopyFromStack1(param_2, p);
+        HSConstantOut _patchConstantResult = PatchHS(param_2);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_patchConstantResult.EdgeTess[2]);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_patchConstantResult.InsideTess);
+    }
+}
+
--- a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
@ -0,0 +1,44 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+struct main0_in
+{
+    float4 gl_Position [[attribute(0)]];
+};
+
+void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in)
+{
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+    set_position(gl_out, gl_InvocationID, gl_in);
+}
+
--- a/reference/shaders-msl/tesc/basic.tesc
+++ b/reference/shaders-msl/tesc/basic.tesc
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+}
+
--- a/reference/shaders-msl/tesc/water_tess.tesc
+++ b/reference/shaders-msl/tesc/water_tess.tesc
@ -0,0 +1,132 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 uScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uMaxTessLevel;
+    float uDistanceMod;
+    float4 uFrustum[6];
+};
+
+struct main0_patchOut
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+struct main0_in
+{
+    float2 vPatchPosBase [[attribute(0)]];
+};
+
+bool frustum_cull(thread const float2& p0, constant UBO& v_41)
+{
+    float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy;
+    float2 max_xz = ((p0 + v_41.uPatchSize) + float2(10.0)) * v_41.uScale.xy;
+    float3 bb_min = float3(min_xz.x, -10.0, min_xz.y);
+    float3 bb_max = float3(max_xz.x, 10.0, max_xz.y);
+    float3 center = (bb_min + bb_max) * 0.5;
+    float radius = 0.5 * length(bb_max - bb_min);
+    float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0)));
+    float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0)));
+    float3 _199 = f0;
+    float _200 = radius;
+    bool _205 = any(_199 <= float3(-_200));
+    bool _215;
+    if (!_205)
+    {
+        _215 = any(f1 <= float3(-radius));
+    }
+    else
+    {
+        _215 = _205;
+    }
+    return !_215;
+}
+
+float lod_factor(thread const float2& pos_, constant UBO& v_41)
+{
+    float2 pos = pos_ * v_41.uScale.xy;
+    float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y);
+    float level = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod);
+    return fast::clamp(level, 0.0, v_41.uMaxTessLevel.x);
+}
+
+float4 tess_level(thread const float4& lod, constant UBO& v_41)
+{
+    return exp2(-lod) * v_41.uMaxTessLevel.y;
+}
+
+float tess_level(thread const float& lod, constant UBO& v_41)
+{
+    return v_41.uMaxTessLevel.y * exp2(-lod);
+}
+
+void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2])
+{
+    vOutPatchPosBase = p0;
+    float2 param = p0 + (float2(-0.5) * v_41.uPatchSize);
+    float l00 = lod_factor(param, v_41);
+    float2 param_1 = p0 + (float2(0.5, -0.5) * v_41.uPatchSize);
+    float l10 = lod_factor(param_1, v_41);
+    float2 param_2 = p0 + (float2(1.5, -0.5) * v_41.uPatchSize);
+    float l20 = lod_factor(param_2, v_41);
+    float2 param_3 = p0 + (float2(-0.5, 0.5) * v_41.uPatchSize);
+    float l01 = lod_factor(param_3, v_41);
+    float2 param_4 = p0 + (float2(0.5) * v_41.uPatchSize);
+    float l11 = lod_factor(param_4, v_41);
+    float2 param_5 = p0 + (float2(1.5, 0.5) * v_41.uPatchSize);
+    float l21 = lod_factor(param_5, v_41);
+    float2 param_6 = p0 + (float2(-0.5, 1.5) * v_41.uPatchSize);
+    float l02 = lod_factor(param_6, v_41);
+    float2 param_7 = p0 + (float2(0.5, 1.5) * v_41.uPatchSize);
+    float l12 = lod_factor(param_7, v_41);
+    float2 param_8 = p0 + (float2(1.5) * v_41.uPatchSize);
+    float l22 = lod_factor(param_8, v_41);
+    float4 lods = float4(dot(float4(l01, l11, l02, l12), float4(0.25)), dot(float4(l00, l10, l01, l11), float4(0.25)), dot(float4(l10, l20, l11, l21), float4(0.25)), dot(float4(l11, l21, l12, l22), float4(0.25)));
+    vPatchLods = lods;
+    float4 outer_lods = fast::min(lods, lods.yzwx);
+    float4 param_9 = outer_lods;
+    float4 levels = tess_level(param_9, v_41);
+    gl_TessLevelOuter[0] = half(levels.x);
+    gl_TessLevelOuter[1] = half(levels.y);
+    gl_TessLevelOuter[2] = half(levels.z);
+    gl_TessLevelOuter[3] = half(levels.w);
+    float min_lod = fast::min(fast::min(lods.x, lods.y), fast::min(lods.z, lods.w));
+    float param_10 = fast::min(min_lod, l11);
+    float inner = tess_level(param_10, v_41);
+    gl_TessLevelInner[0] = half(inner);
+    gl_TessLevelInner[1] = half(inner);
+}
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& v_41 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    float2 p0 = gl_in[0].vPatchPosBase;
+    float2 param = p0;
+    if (!frustum_cull(param, v_41))
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0);
+    }
+    else
+    {
+        float2 param_1 = p0;
+        compute_tess_levels(param_1, v_41, patchOut.vOutPatchPosBase, patchOut.vPatchLods, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, spvTessLevel[gl_PrimitiveID].insideTessellationFactor);
+    }
+}
+
--- a/reference/shaders-msl/vert/in_out_array_mat.vert
+++ b/reference/shaders-msl/vert/in_out_array_mat.vert
@ -44,7 +44,7 @@ void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo,
    color = colors[2];
 }

-void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], thread float3& inNormal)
+void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], float3 inNormal)
 {
    outTransModel[2] = float4(inNormal, 1.0);
    write_deeper_in_function(outTransModel, ubo, color, colors);
--- a/reference/shaders-msl/vert/leaf-function.capture.vert
+++ b/reference/shaders-msl/vert/leaf-function.capture.vert
@ -22,7 +22,7 @@ struct main0_in
    float3 aNormal [[attribute(1)]];
 };

-void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal)
+void set_output(device float4& gl_Position, constant UBO& v_18, float4 aVertex, device float3& vNormal, float3 aNormal)
 {
    gl_Position = v_18.uMVP * aVertex;
    vNormal = aNormal;
--- a/reference/shaders-msl/vert/return-array.vert
+++ b/reference/shaders-msl/vert/return-array.vert
@ -36,7 +36,7 @@ void test(thread float4 (&SPIRV_Cross_return_value)[2])
    spvArrayCopyFromConstant1(SPIRV_Cross_return_value, _20);
 }

-void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1)
+void test2(thread float4 (&SPIRV_Cross_return_value)[2], float4 vInput0, float4 vInput1)
 {
    float4 foobar[2];
    foobar[0] = vInput0;
--- a/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ b/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@ -0,0 +1,248 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 162
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %hs_main "main" %p_pos %p_1 %i_1 %_entryPointOutput_pos %_entryPointOutput %_patchConstantOutput_EdgeTess %_patchConstantOutput_InsideTess
+               OpExecutionMode %hs_main OutputVertices 3
+               OpExecutionMode %hs_main Triangles
+               OpExecutionMode %hs_main SpacingFractionalOdd
+               OpExecutionMode %hs_main VertexOrderCw
+               OpSource HLSL 500
+               OpName %hs_main "hs_main"
+               OpName %VertexOutput "VertexOutput"
+               OpMemberName %VertexOutput 0 "pos"
+               OpMemberName %VertexOutput 1 "uv"
+               OpName %HSOut "HSOut"
+               OpMemberName %HSOut 0 "pos"
+               OpMemberName %HSOut 1 "uv"
+               OpName %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ "@hs_main(struct-VertexOutput-vf4-vf21[3];u1;"
+               OpName %p "p"
+               OpName %i "i"
+               OpName %HSConstantOut "HSConstantOut"
+               OpMemberName %HSConstantOut 0 "EdgeTess"
+               OpMemberName %HSConstantOut 1 "InsideTess"
+               OpName %PatchHS_struct_VertexOutput_vf4_vf21_3__ "PatchHS(struct-VertexOutput-vf4-vf21[3];"
+               OpName %patch "patch"
+               OpName %output "output"
+               OpName %p_0 "p"
+               OpName %p_pos "p.pos"
+               OpName %VertexOutput_0 "VertexOutput"
+               OpMemberName %VertexOutput_0 0 "uv"
+               OpName %p_1 "p"
+               OpName %i_0 "i"
+               OpName %i_1 "i"
+               OpName %flattenTemp "flattenTemp"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpName %_entryPointOutput_pos "@entryPointOutput.pos"
+               OpName %HSOut_0 "HSOut"
+               OpMemberName %HSOut_0 0 "uv"
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpName %_patchConstantResult "@patchConstantResult"
+               OpName %param_1 "param"
+               OpName %_patchConstantOutput_EdgeTess "@patchConstantOutput.EdgeTess"
+               OpName %_patchConstantOutput_InsideTess "@patchConstantOutput.InsideTess"
+               OpName %output_0 "output"
+               OpDecorate %p_pos BuiltIn Position
+               OpDecorate %p_1 Location 0
+               OpDecorate %i_1 BuiltIn InvocationId
+               OpDecorate %_entryPointOutput_pos BuiltIn Position
+               OpDecorate %_entryPointOutput Location 0
+               OpDecorate %_patchConstantOutput_EdgeTess Patch
+               OpDecorate %_patchConstantOutput_EdgeTess BuiltIn TessLevelOuter
+               OpDecorate %_patchConstantOutput_InsideTess Patch
+               OpDecorate %_patchConstantOutput_InsideTess BuiltIn TessLevelInner
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v2float = OpTypeVector %float 2
+%VertexOutput = OpTypeStruct %v4float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+%_arr_VertexOutput_uint_3 = OpTypeArray %VertexOutput %uint_3
+%_ptr_Function__arr_VertexOutput_uint_3 = OpTypePointer Function %_arr_VertexOutput_uint_3
+%_ptr_Function_uint = OpTypePointer Function %uint
+      %HSOut = OpTypeStruct %v4float %v2float
+         %16 = OpTypeFunction %HSOut %_ptr_Function__arr_VertexOutput_uint_3 %_ptr_Function_uint
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%HSConstantOut = OpTypeStruct %_arr_float_uint_3 %float
+         %23 = OpTypeFunction %HSConstantOut %_ptr_Function__arr_VertexOutput_uint_3
+%_ptr_Function_HSOut = OpTypePointer Function %HSOut
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+      %p_pos = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%VertexOutput_0 = OpTypeStruct %v2float
+%_arr_VertexOutput_0_uint_3 = OpTypeArray %VertexOutput_0 %uint_3
+%_ptr_Input__arr_VertexOutput_0_uint_3 = OpTypePointer Input %_arr_VertexOutput_0_uint_3
+        %p_1 = OpVariable %_ptr_Input__arr_VertexOutput_0_uint_3 Input
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+      %int_2 = OpConstant %int 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+        %i_1 = OpVariable %_ptr_Input_uint Input
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_entryPointOutput_pos = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+    %HSOut_0 = OpTypeStruct %v2float
+%_arr_HSOut_0_uint_3 = OpTypeArray %HSOut_0 %uint_3
+%_ptr_Output__arr_HSOut_0_uint_3 = OpTypePointer Output %_arr_HSOut_0_uint_3
+%_entryPointOutput = OpVariable %_ptr_Output__arr_HSOut_0_uint_3 Output
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+     %uint_2 = OpConstant %uint 2
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+       %bool = OpTypeBool
+%_ptr_Function_HSConstantOut = OpTypePointer Function %HSConstantOut
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_patchConstantOutput_EdgeTess = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Output_float = OpTypePointer Output %float
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_patchConstantOutput_InsideTess = OpVariable %_ptr_Output__arr_float_uint_2 Output
+    %float_1 = OpConstant %float 1
+    %hs_main = OpFunction %void None %3
+          %5 = OpLabel
+        %p_0 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+        %i_0 = OpVariable %_ptr_Function_uint Function
+%flattenTemp = OpVariable %_ptr_Function_HSOut Function
+      %param = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+    %param_0 = OpVariable %_ptr_Function_uint Function
+%_patchConstantResult = OpVariable %_ptr_Function_HSConstantOut Function
+    %param_1 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+         %50 = OpAccessChain %_ptr_Input_v4float %p_pos %int_0
+         %51 = OpLoad %v4float %50
+         %52 = OpAccessChain %_ptr_Function_v4float %p_0 %int_0 %int_0
+               OpStore %52 %51
+         %58 = OpAccessChain %_ptr_Input_v2float %p_1 %int_0 %int_0
+         %59 = OpLoad %v2float %58
+         %60 = OpAccessChain %_ptr_Function_v2float %p_0 %int_0 %int_1
+               OpStore %60 %59
+         %61 = OpAccessChain %_ptr_Input_v4float %p_pos %int_1
+         %62 = OpLoad %v4float %61
+         %63 = OpAccessChain %_ptr_Function_v4float %p_0 %int_1 %int_0
+               OpStore %63 %62
+         %64 = OpAccessChain %_ptr_Input_v2float %p_1 %int_1 %int_0
+         %65 = OpLoad %v2float %64
+         %66 = OpAccessChain %_ptr_Function_v2float %p_0 %int_1 %int_1
+               OpStore %66 %65
+         %68 = OpAccessChain %_ptr_Input_v4float %p_pos %int_2
+         %69 = OpLoad %v4float %68
+         %70 = OpAccessChain %_ptr_Function_v4float %p_0 %int_2 %int_0
+               OpStore %70 %69
+         %71 = OpAccessChain %_ptr_Input_v2float %p_1 %int_2 %int_0
+         %72 = OpLoad %v2float %71
+         %73 = OpAccessChain %_ptr_Function_v2float %p_0 %int_2 %int_1
+               OpStore %73 %72
+         %77 = OpLoad %uint %i_1
+               OpStore %i_0 %77
+         %80 = OpLoad %_arr_VertexOutput_uint_3 %p_0
+               OpStore %param %80
+         %82 = OpLoad %uint %i_0
+               OpStore %param_0 %82
+         %83 = OpFunctionCall %HSOut %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ %param %param_0
+               OpStore %flattenTemp %83
+         %86 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0
+         %87 = OpLoad %v4float %86
+         %94 = OpLoad %uint %i_1
+         %89 = OpAccessChain %_ptr_Output_v4float %_entryPointOutput_pos %94
+               OpStore %89 %87
+         %95 = OpAccessChain %_ptr_Function_v2float %flattenTemp %int_1
+         %96 = OpLoad %v2float %95
+         %98 = OpAccessChain %_ptr_Output_v2float %_entryPointOutput %94 %int_0
+               OpStore %98 %96
+               OpControlBarrier %uint_2 %uint_1 %uint_0
+        %102 = OpLoad %uint %i_1
+        %104 = OpIEqual %bool %102 %int_0
+               OpSelectionMerge %106 None
+               OpBranchConditional %104 %105 %106
+        %105 = OpLabel
+        %110 = OpLoad %_arr_VertexOutput_uint_3 %p_0
+               OpStore %param_1 %110
+        %111 = OpFunctionCall %HSConstantOut %PatchHS_struct_VertexOutput_vf4_vf21_3__ %param_1
+               OpStore %_patchConstantResult %111
+        %117 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_0
+        %118 = OpLoad %float %117
+        %120 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_0
+               OpStore %120 %118
+        %121 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_1
+        %122 = OpLoad %float %121
+        %123 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_1
+               OpStore %123 %122
+        %124 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_2
+        %125 = OpLoad %float %124
+        %126 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_2
+               OpStore %126 %125
+        %130 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_1
+        %131 = OpLoad %float %130
+        %132 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_InsideTess %int_0
+               OpStore %132 %131
+               OpBranch %106
+        %106 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ = OpFunction %HSOut None %16
+          %p = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3
+          %i = OpFunctionParameter %_ptr_Function_uint
+         %20 = OpLabel
+     %output = OpVariable %_ptr_Function_HSOut Function
+         %31 = OpLoad %uint %i
+         %33 = OpAccessChain %_ptr_Function_v4float %p %31 %int_0
+         %34 = OpLoad %v4float %33
+         %35 = OpAccessChain %_ptr_Function_v4float %output %int_0
+               OpStore %35 %34
+         %37 = OpLoad %uint %i
+         %39 = OpAccessChain %_ptr_Function_v2float %p %37 %int_1
+         %40 = OpLoad %v2float %39
+         %41 = OpAccessChain %_ptr_Function_v2float %output %int_1
+               OpStore %41 %40
+         %42 = OpLoad %HSOut %output
+               OpReturnValue %42
+               OpFunctionEnd
+%PatchHS_struct_VertexOutput_vf4_vf21_3__ = OpFunction %HSConstantOut None %23
+      %patch = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3
+         %26 = OpLabel
+   %output_0 = OpVariable %_ptr_Function_HSConstantOut Function
+        %135 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %136 = OpLoad %v2float %135
+        %137 = OpCompositeConstruct %v2float %float_1 %float_1
+        %138 = OpFAdd %v2float %137 %136
+        %139 = OpCompositeExtract %float %138 0
+        %140 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_0
+               OpStore %140 %139
+        %141 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %142 = OpLoad %v2float %141
+        %143 = OpCompositeConstruct %v2float %float_1 %float_1
+        %144 = OpFAdd %v2float %143 %142
+        %145 = OpCompositeExtract %float %144 0
+        %146 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_1
+               OpStore %146 %145
+        %147 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %148 = OpLoad %v2float %147
+        %149 = OpCompositeConstruct %v2float %float_1 %float_1
+        %150 = OpFAdd %v2float %149 %148
+        %151 = OpCompositeExtract %float %150 0
+        %152 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_2
+               OpStore %152 %151
+        %153 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %154 = OpLoad %v2float %153
+        %155 = OpCompositeConstruct %v2float %float_1 %float_1
+        %156 = OpFAdd %v2float %155 %154
+        %157 = OpCompositeExtract %float %156 0
+        %158 = OpAccessChain %_ptr_Function_float %output_0 %int_1
+               OpStore %158 %157
+        %159 = OpLoad %HSConstantOut %output_0
+               OpReturnValue %159
+               OpFunctionEnd
--- a/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
@ -0,0 +1,32 @@
+#version 450
+layout(vertices = 1) out;
+
+in gl_PerVertex
+{
+   vec4 gl_Position;
+} gl_in[gl_MaxPatchVertices];
+
+out gl_PerVertex
+{
+   vec4 gl_Position;
+} gl_out[1];
+
+layout(location = 0) patch out vec3 vFoo;
+
+void set_position()
+{
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
+void main()
+{
+    gl_TessLevelInner[0] = 8.9;
+    gl_TessLevelInner[1] = 6.9;
+    gl_TessLevelOuter[0] = 8.9;
+    gl_TessLevelOuter[1] = 6.9;
+    gl_TessLevelOuter[2] = 3.9;
+    gl_TessLevelOuter[3] = 4.9;
+    vFoo = vec3(1.0);
+
+    set_position();
+}
--- a/shaders-msl/tesc/basic.tesc
+++ b/shaders-msl/tesc/basic.tesc
@ -0,0 +1,17 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(location = 0) patch out vec3 vFoo;
+
+layout(vertices = 1) out;
+
+void main()
+{
+    gl_TessLevelInner[0] = 8.9;
+    gl_TessLevelInner[1] = 6.9;
+    gl_TessLevelOuter[0] = 8.9;
+    gl_TessLevelOuter[1] = 6.9;
+    gl_TessLevelOuter[2] = 3.9;
+    gl_TessLevelOuter[3] = 4.9;
+    vFoo = vec3(1.0);
+}
--- a/shaders-msl/tesc/water_tess.tesc
+++ b/shaders-msl/tesc/water_tess.tesc
@ -0,0 +1,115 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(vertices = 1) out;
+layout(location = 0) in vec2 vPatchPosBase[];
+
+layout(std140) uniform UBO
+{
+    vec4 uScale;
+    highp vec3 uCamPos;
+    vec2 uPatchSize;
+    vec2 uMaxTessLevel;
+    float uDistanceMod;
+    vec4 uFrustum[6];
+};
+
+layout(location = 1) patch out vec2 vOutPatchPosBase;
+layout(location = 2) patch out vec4 vPatchLods;
+
+float lod_factor(vec2 pos_)
+{
+    vec2 pos = pos_ * uScale.xy;
+    vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y);
+    float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod);
+    return clamp(level, 0.0, uMaxTessLevel.x);
+}
+
+float tess_level(float lod)
+{
+    return uMaxTessLevel.y * exp2(-lod);
+}
+
+vec4 tess_level(vec4 lod)
+{
+    return uMaxTessLevel.y * exp2(-lod);
+}
+
+// Guard band for vertex displacement.
+#define GUARD_BAND 10.0
+bool frustum_cull(vec2 p0)
+{
+    vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy;
+    vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy;
+
+    vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y);
+    vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y);
+    vec3 center = 0.5 * (bb_min + bb_max);
+    float radius = 0.5 * length(bb_max - bb_min);
+
+    vec3 f0 = vec3(
+        dot(uFrustum[0], vec4(center, 1.0)),
+        dot(uFrustum[1], vec4(center, 1.0)),
+        dot(uFrustum[2], vec4(center, 1.0)));
+
+    vec3 f1 = vec3(
+        dot(uFrustum[3], vec4(center, 1.0)),
+        dot(uFrustum[4], vec4(center, 1.0)),
+        dot(uFrustum[5], vec4(center, 1.0)));
+
+    return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius))));
+}
+
+void compute_tess_levels(vec2 p0)
+{
+    vOutPatchPosBase = p0;
+
+    float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize);
+    float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize);
+    float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize);
+    float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize);
+    float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize);
+    float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize);
+    float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize);
+    float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize);
+    float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize);
+
+    vec4 lods = vec4(
+        dot(vec4(l01, l11, l02, l12), vec4(0.25)),
+        dot(vec4(l00, l10, l01, l11), vec4(0.25)),
+        dot(vec4(l10, l20, l11, l21), vec4(0.25)),
+        dot(vec4(l11, l21, l12, l22), vec4(0.25)));
+
+    vPatchLods = lods;
+
+    vec4 outer_lods = min(lods.xyzw, lods.yzwx);
+    vec4 levels = tess_level(outer_lods);
+    gl_TessLevelOuter[0] = levels.x;
+    gl_TessLevelOuter[1] = levels.y;
+    gl_TessLevelOuter[2] = levels.z;
+    gl_TessLevelOuter[3] = levels.w;
+
+    float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w));
+    float inner = tess_level(min(min_lod, l11));
+    gl_TessLevelInner[0] = inner;
+    gl_TessLevelInner[1] = inner;
+}
+
+void main()
+{
+    vec2 p0 = vPatchPosBase[0];
+    if (!frustum_cull(p0))
+    {
+        gl_TessLevelOuter[0] = -1.0;
+        gl_TessLevelOuter[1] = -1.0;
+        gl_TessLevelOuter[2] = -1.0;
+        gl_TessLevelOuter[3] = -1.0;
+        gl_TessLevelInner[0] = -1.0;
+        gl_TessLevelInner[1] = -1.0;
+    }
+    else
+    {
+        compute_tess_levels(p0);
+    }
+}
+
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -1381,6 +1381,8 @@ struct Meta
 		{
 			uint32_t packed_type = 0;
 			bool packed = false;
+			uint32_t ib_member_index = -1;
+			uint32_t ib_orig_id = 0;
 		} extended;
 	};

--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -1094,6 +1094,22 @@ const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
 	return get<SPIRType>(get_variable_data_type_id(var));
 }

+SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
+{
+	SPIRType *type = &get_variable_data_type(var);
+	if (is_array(*type))
+		type = &get<SPIRType>(type->parent_type);
+	return *type;
+}
+
+const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
+{
+	const SPIRType *type = &get_variable_data_type(var);
+	if (is_array(*type))
+		type = &get<SPIRType>(type->parent_type);
+	return *type;
+}
+
 bool Compiler::is_sampled_image_type(const SPIRType &type)
 {
 	return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
@ -1183,6 +1199,14 @@ void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decorati
 	case SPIRVCrossDecorationPackedType:
 		dec.extended.packed_type = value;
 		break;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		dec.extended.ib_member_index = value;
+		break;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		dec.extended.ib_orig_id = value;
+		break;
 	}
 }

@ -1201,6 +1225,14 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext
 	case SPIRVCrossDecorationPackedType:
 		dec.extended.packed_type = value;
 		break;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		dec.extended.ib_member_index = value;
+		break;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		dec.extended.ib_orig_id = value;
+		break;
 	}
 }

@ -1218,6 +1250,12 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco

 	case SPIRVCrossDecorationPackedType:
 		return dec.extended.packed_type;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		return dec.extended.ib_member_index;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		return dec.extended.ib_orig_id;
 	}

 	return 0;
@ -1240,6 +1278,12 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index,

 	case SPIRVCrossDecorationPackedType:
 		return dec.extended.packed_type;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		return dec.extended.ib_member_index;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		return dec.extended.ib_orig_id;
 	}

 	return 0;
@ -1259,6 +1303,12 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati

 	case SPIRVCrossDecorationPackedType:
 		return dec.extended.packed_type != 0;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		return dec.extended.ib_member_index != (uint32_t)-1;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		return dec.extended.ib_orig_id != 0;
 	}

 	return false;
@ -1281,6 +1331,12 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext

 	case SPIRVCrossDecorationPackedType:
 		return dec.extended.packed_type != 0;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		return dec.extended.ib_member_index != (uint32_t)-1;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		return dec.extended.ib_orig_id != 0;
 	}

 	return false;
@ -1298,6 +1354,14 @@ void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decora
 	case SPIRVCrossDecorationPackedType:
 		dec.extended.packed_type = 0;
 		break;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		dec.extended.ib_member_index = -1;
+		break;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		dec.extended.ib_orig_id = 0;
+		break;
 	}
 }

@ -1315,6 +1379,14 @@ void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, E
 	case SPIRVCrossDecorationPackedType:
 		dec.extended.packed_type = 0;
 		break;
+
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		dec.extended.ib_member_index = -1;
+		break;
+
+	case SPIRVCrossDecorationInterfaceOrigID:
+		dec.extended.ib_orig_id = 0;
+		break;
 	}
 }

@ -3527,7 +3599,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
 		auto *type = &compiler.get_variable_data_type(*var);

 		auto &flags =
-		    type->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
+		    var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;

 		uint32_t count = length - 3;
 		args += 3;
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@ -117,7 +117,9 @@ struct EntryPoint
 enum ExtendedDecorations
 {
 	SPIRVCrossDecorationPacked,
-	SPIRVCrossDecorationPackedType
+	SPIRVCrossDecorationPackedType,
+	SPIRVCrossDecorationInterfaceMemberIndex,
+	SPIRVCrossDecorationInterfaceOrigID,
 };

 class Compiler
@ -201,6 +203,12 @@ public:
 	// Gets the SPIR-V type underlying a variable.
 	const SPIRType &get_variable_data_type(const SPIRVariable &var) const;

+	// Gets the SPIR-V element type underlying an array variable.
+	SPIRType &get_variable_element_type(const SPIRVariable &var);
+
+	// Gets the SPIR-V element type underlying an array variable.
+	const SPIRType &get_variable_element_type(const SPIRVariable &var) const;
+
 	// Returns if the given type refers to a sampled image.
 	bool is_sampled_image_type(const SPIRType &type);

--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -170,6 +170,9 @@ public:
 		uint32_t aux_buffer_index = 30;
 		uint32_t indirect_params_buffer_index = 29;
 		uint32_t shader_output_buffer_index = 28;
+		uint32_t shader_patch_output_buffer_index = 27;
+		uint32_t shader_tess_factor_buffer_index = 26;
+		uint32_t shader_input_wg_index = 0;
 		bool enable_point_size_builtin = true;
 		bool disable_rasterization = false;
 		bool capture_output_to_buffer = false;
@ -231,7 +234,8 @@ public:
 	// rasterization if vertex shader requires rasterization to be disabled.
 	bool get_is_rasterization_disabled() const
 	{
-		return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex);
+		return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex ||
+		                                     get_entry_point().model == spv::ExecutionModelTessellationControl);
 	}

 	// Provide feedback to calling API to allow it to pass an auxiliary
@ -248,6 +252,20 @@ public:
 		return capture_output_to_buffer && stage_out_var_id != 0;
 	}

+	// Provide feedback to calling API to allow it to pass a patch output
+	// buffer if the shader needs it.
+	bool needs_patch_output_buffer() const
+	{
+		return capture_output_to_buffer && patch_stage_out_var_id != 0;
+	}
+
+	// Provide feedback to calling API to allow it to pass an input threadgroup
+	// buffer if the shader needs it.
+	bool needs_input_threadgroup_mem() const
+	{
+		return capture_output_to_buffer && stage_in_var_id != 0;
+	}
+
 	// An enum of SPIR-V functions that are implemented in additional
 	// source code that is added to the shader if necessary.
 	enum SPVFuncImpl
@ -384,19 +402,24 @@ protected:
 	void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
 	                                            std::unordered_set<uint32_t> &global_var_ids,
 	                                            std::unordered_set<uint32_t> &processed_func_ids);
-	uint32_t add_interface_block(spv::StorageClass storage);
+	uint32_t add_interface_block(spv::StorageClass storage, bool patch = false);
+	uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage);

 	void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
-	                                     SPIRVariable &var);
+	                                     SPIRVariable &var, bool strip_array);
 	void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                               SPIRType &ib_type, SPIRVariable &var);
+	                                               SPIRType &ib_type, SPIRVariable &var, bool strip_array);
 	void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                           SPIRType &ib_type, SPIRVariable &var);
+	                                           SPIRType &ib_type, SPIRVariable &var, bool strip_array);
 	void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                                  SPIRType &ib_type, SPIRVariable &var, uint32_t index);
+	                                                  SPIRType &ib_type, SPIRVariable &var, uint32_t index,
+	                                                  bool strip_array);
 	void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                                      SPIRType &ib_type, SPIRVariable &var, uint32_t index);
-	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx);
+	                                                      SPIRType &ib_type, SPIRVariable &var, uint32_t index,
+	                                                      bool strip_array);
+	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array);
+
+	void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);

 	void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage);
 	uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin);
@ -431,7 +454,10 @@ protected:
 	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
 	std::string get_argument_address_space(const SPIRVariable &argument);
 	std::string get_type_address_space(const SPIRType &type);
+	SPIRType &get_stage_in_struct_type();
 	SPIRType &get_stage_out_struct_type();
+	SPIRType &get_patch_stage_out_struct_type();
+	std::string get_tess_factor_struct_name();
 	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
 	                         uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
 	                         bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
@ -448,6 +474,8 @@ protected:
 	uint32_t builtin_base_vertex_id = 0;
 	uint32_t builtin_instance_idx_id = 0;
 	uint32_t builtin_base_instance_id = 0;
+	uint32_t builtin_invocation_id_id = 0;
+	uint32_t builtin_primitive_id_id = 0;
 	uint32_t aux_buffer_id = 0;

 	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
@ -468,6 +496,9 @@ protected:
 	MSLResourceBinding next_metal_resource_index;
 	uint32_t stage_in_var_id = 0;
 	uint32_t stage_out_var_id = 0;
+	uint32_t patch_stage_out_var_id = 0;
+	uint32_t stage_in_ptr_var_id = 0;
+	uint32_t stage_out_ptr_var_id = 0;
 	bool has_sampled_images = false;
 	bool needs_vertex_idx_arg = false;
 	bool needs_instance_idx_arg = false;
@ -478,9 +509,13 @@ protected:
 	std::string qual_pos_var_name;
 	std::string stage_in_var_name = "in";
 	std::string stage_out_var_name = "out";
+	std::string patch_stage_out_var_name = "patchOut";
 	std::string sampler_name_suffix = "Smplr";
 	std::string swizzle_name_suffix = "Swzl";
+	std::string input_wg_var_name = "gl_in";
 	std::string output_buffer_var_name = "spvOut";
+	std::string patch_output_buffer_var_name = "spvPatchOut";
+	std::string tess_factor_buffer_var_name = "spvTessLevel";
 	spv::Op previous_instruction_opcode = spv::OpNop;

 	std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;