diff --git a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc new file mode 100644 index 00000000..e256409d --- /dev/null +++ b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -0,0 +1,71 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertexOutput +{ + float4 pos; + float2 uv; +}; + +struct VertexOutput_1 +{ + float2 uv; +}; + +struct HSOut +{ + float2 uv; +}; + +struct main0_out +{ + HSOut _entryPointOutput; + float4 gl_Position; +}; + +struct main0_in +{ + float2 VertexOutput_uv [[attribute(0)]]; + float4 gl_Position [[attribute(1)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +template +void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } }; + VertexOutput param[3]; + spvArrayCopyFromStack1(param, _223); + gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos; + gl_out[gl_InvocationID]._entryPointOutput.uv = param[gl_InvocationID].uv; + threadgroup_barrier(mem_flags::mem_device); + if (int(gl_InvocationID) == 0) + { + float2 _174 = float2(1.0) + gl_in[0].VertexOutput_uv; + float _175 = _174.x; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_175); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_175); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_175); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_175); + } +} + diff --git a/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc new file mode 100644 index 00000000..4d932d58 --- /dev/null +++ b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc @@ -0,0 +1,37 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + float3 vFoo; +}; + +struct main0_in +{ + float4 gl_Position [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + diff --git a/reference/opt/shaders-msl/tesc/basic.tesc b/reference/opt/shaders-msl/tesc/basic.tesc new file mode 100644 index 00000000..832582f8 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/basic.tesc @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_patchOut +{ + float3 vFoo; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); +} + diff --git a/reference/opt/shaders-msl/tesc/water_tess.tesc b/reference/opt/shaders-msl/tesc/water_tess.tesc new file mode 100644 index 00000000..e89e6570 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/water_tess.tesc @@ -0,0 +1,91 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4 uScale; + float3 uCamPos; + float2 uPatchSize; + float2 uMaxTessLevel; + float uDistanceMod; + float4 uFrustum[6]; +}; + +struct main0_patchOut +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +struct main0_in +{ + float2 vPatchPosBase [[attribute(0)]]; +}; + +kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy; + float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy; + float3 _445 = float3(_430.x, -10.0, _430.y); + float3 _450 = float3(_440.x, 10.0, _440.y); + float4 _466 = float4((_445 + _450) * 0.5, 1.0); + float3 _513 = float3(length(_450 - _445) * (-0.5)); + bool _515 = any(float3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)) <= _513); + bool _525; + if (!_515) + { + _525 = any(float3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)) <= _513); + } + else + { + _525 = _515; + } + if (!(!_525)) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0); + } + else + { + patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase; + float2 _678 = (gl_in[0].vPatchPosBase + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _706 = (gl_in[0].vPatchPosBase + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; + float _725 = fast::clamp(log2((length(_41.uCamPos - float3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _734 = (gl_in[0].vPatchPosBase + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _762 = (gl_in[0].vPatchPosBase + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float _781 = fast::clamp(log2((length(_41.uCamPos - float3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _790 = (gl_in[0].vPatchPosBase + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy; + float _809 = fast::clamp(log2((length(_41.uCamPos - float3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _818 = (gl_in[0].vPatchPosBase + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float _837 = fast::clamp(log2((length(_41.uCamPos - float3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _846 = (gl_in[0].vPatchPosBase + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _874 = (gl_in[0].vPatchPosBase + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; + float _893 = fast::clamp(log2((length(_41.uCamPos - float3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); + float2 _902 = (gl_in[0].vPatchPosBase + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy; + float _612 = dot(float4(_781, _809, fast::clamp(log2((length(_41.uCamPos - float3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), float4(0.25)); + float _618 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), float4(0.25)); + float _624 = dot(float4(_725, fast::clamp(log2((length(_41.uCamPos - float3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), float4(0.25)); + float _630 = dot(float4(_809, _837, _893, fast::clamp(log2((length(_41.uCamPos - float3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25)); + float4 _631 = float4(_612, _618, _624, _630); + patchOut.vPatchLods = _631; + float4 _928 = exp2(-fast::min(_631, _631.yzwx)) * _41.uMaxTessLevel.y; + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_928.x); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_928.y); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_928.z); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_928.w); + float _935 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_612, _618), fast::min(_624, _630)), _809)); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_935); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_935); + } +} + diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert index ab563136..02c3401d 100644 --- a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert +++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert @@ -31,7 +31,7 @@ void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) for (uint i = 0; i < N; dst[i] = src[i], i++); } -float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], int Index1, int Index2) { float4 indexable[4]; spvArrayCopyFromStack1(indexable, positions); @@ -40,7 +40,7 @@ float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread cons return indexable[Index1] + indexable_1[Index2]; } -float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], int Index1, int Index2) { return consume_constant_arrays2(positions, positions2, Index1, Index2); } diff --git a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc new file mode 100644 index 00000000..ef9ef7cc --- /dev/null +++ b/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -0,0 +1,111 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertexOutput +{ + float4 pos; + float2 uv; +}; + +struct HSOut +{ + float4 pos; + float2 uv; +}; + +struct HSConstantOut +{ + float EdgeTess[3]; + float InsideTess; +}; + +struct VertexOutput_1 +{ + float2 uv; +}; + +struct HSOut_1 +{ + float2 uv; +}; + +struct main0_out +{ + HSOut_1 _entryPointOutput; + float4 gl_Position; +}; + +struct main0_in +{ + float2 VertexOutput_uv [[attribute(0)]]; + float4 gl_Position [[attribute(1)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +template +void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i) +{ + HSOut _output; + _output.pos = p[i].pos; + _output.uv = p[i].uv; + return _output; +} + +HSConstantOut PatchHS(thread const VertexOutput (&_patch)[3]) +{ + HSConstantOut _output; + _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x; + _output.EdgeTess[1] = (float2(1.0) + _patch[0].uv).x; + _output.EdgeTess[2] = (float2(1.0) + _patch[0].uv).x; + _output.InsideTess = (float2(1.0) + _patch[0].uv).x; + return _output; +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + VertexOutput p[3]; + p[0].pos = gl_in[0].gl_Position; + p[0].uv = gl_in[0].VertexOutput_uv; + p[1].pos = gl_in[1].gl_Position; + p[1].uv = gl_in[1].VertexOutput_uv; + p[2].pos = gl_in[2].gl_Position; + p[2].uv = gl_in[2].VertexOutput_uv; + uint i = gl_InvocationID; + VertexOutput param[3]; + spvArrayCopyFromStack1(param, p); + uint param_1 = i; + HSOut flattenTemp = _hs_main(param, param_1); + gl_out[gl_InvocationID].gl_Position = flattenTemp.pos; + gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv; + threadgroup_barrier(mem_flags::mem_device); + if (int(gl_InvocationID) == 0) + { + VertexOutput param_2[3]; + spvArrayCopyFromStack1(param_2, p); + HSConstantOut _patchConstantResult = PatchHS(param_2); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_patchConstantResult.EdgeTess[2]); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_patchConstantResult.InsideTess); + } +} + diff --git a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc new file mode 100644 index 00000000..9c4e3b3a --- /dev/null +++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc @@ -0,0 +1,44 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + float3 vFoo; +}; + +struct main0_in +{ + float4 gl_Position [[attribute(0)]]; +}; + +void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in) +{ + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); + set_position(gl_out, gl_InvocationID, gl_in); +} + diff --git a/reference/shaders-msl/tesc/basic.tesc b/reference/shaders-msl/tesc/basic.tesc new file mode 100644 index 00000000..832582f8 --- /dev/null +++ b/reference/shaders-msl/tesc/basic.tesc @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_patchOut +{ + float3 vFoo; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625); + patchOut.vFoo = float3(1.0); +} + diff --git a/reference/shaders-msl/tesc/water_tess.tesc b/reference/shaders-msl/tesc/water_tess.tesc new file mode 100644 index 00000000..48b3566f --- /dev/null +++ b/reference/shaders-msl/tesc/water_tess.tesc @@ -0,0 +1,132 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4 uScale; + float3 uCamPos; + float2 uPatchSize; + float2 uMaxTessLevel; + float uDistanceMod; + float4 uFrustum[6]; +}; + +struct main0_patchOut +{ + float2 vOutPatchPosBase; + float4 vPatchLods; +}; + +struct main0_in +{ + float2 vPatchPosBase [[attribute(0)]]; +}; + +bool frustum_cull(thread const float2& p0, constant UBO& v_41) +{ + float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy; + float2 max_xz = ((p0 + v_41.uPatchSize) + float2(10.0)) * v_41.uScale.xy; + float3 bb_min = float3(min_xz.x, -10.0, min_xz.y); + float3 bb_max = float3(max_xz.x, 10.0, max_xz.y); + float3 center = (bb_min + bb_max) * 0.5; + float radius = 0.5 * length(bb_max - bb_min); + float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0))); + float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0))); + float3 _199 = f0; + float _200 = radius; + bool _205 = any(_199 <= float3(-_200)); + bool _215; + if (!_205) + { + _215 = any(f1 <= float3(-radius)); + } + else + { + _215 = _205; + } + return !_215; +} + +float lod_factor(thread const float2& pos_, constant UBO& v_41) +{ + float2 pos = pos_ * v_41.uScale.xy; + float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod); + return fast::clamp(level, 0.0, v_41.uMaxTessLevel.x); +} + +float4 tess_level(thread const float4& lod, constant UBO& v_41) +{ + return exp2(-lod) * v_41.uMaxTessLevel.y; +} + +float tess_level(thread const float& lod, constant UBO& v_41) +{ + return v_41.uMaxTessLevel.y * exp2(-lod); +} + +void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2]) +{ + vOutPatchPosBase = p0; + float2 param = p0 + (float2(-0.5) * v_41.uPatchSize); + float l00 = lod_factor(param, v_41); + float2 param_1 = p0 + (float2(0.5, -0.5) * v_41.uPatchSize); + float l10 = lod_factor(param_1, v_41); + float2 param_2 = p0 + (float2(1.5, -0.5) * v_41.uPatchSize); + float l20 = lod_factor(param_2, v_41); + float2 param_3 = p0 + (float2(-0.5, 0.5) * v_41.uPatchSize); + float l01 = lod_factor(param_3, v_41); + float2 param_4 = p0 + (float2(0.5) * v_41.uPatchSize); + float l11 = lod_factor(param_4, v_41); + float2 param_5 = p0 + (float2(1.5, 0.5) * v_41.uPatchSize); + float l21 = lod_factor(param_5, v_41); + float2 param_6 = p0 + (float2(-0.5, 1.5) * v_41.uPatchSize); + float l02 = lod_factor(param_6, v_41); + float2 param_7 = p0 + (float2(0.5, 1.5) * v_41.uPatchSize); + float l12 = lod_factor(param_7, v_41); + float2 param_8 = p0 + (float2(1.5) * v_41.uPatchSize); + float l22 = lod_factor(param_8, v_41); + float4 lods = float4(dot(float4(l01, l11, l02, l12), float4(0.25)), dot(float4(l00, l10, l01, l11), float4(0.25)), dot(float4(l10, l20, l11, l21), float4(0.25)), dot(float4(l11, l21, l12, l22), float4(0.25))); + vPatchLods = lods; + float4 outer_lods = fast::min(lods, lods.yzwx); + float4 param_9 = outer_lods; + float4 levels = tess_level(param_9, v_41); + gl_TessLevelOuter[0] = half(levels.x); + gl_TessLevelOuter[1] = half(levels.y); + gl_TessLevelOuter[2] = half(levels.z); + gl_TessLevelOuter[3] = half(levels.w); + float min_lod = fast::min(fast::min(lods.x, lods.y), fast::min(lods.z, lods.w)); + float param_10 = fast::min(min_lod, l11); + float inner = tess_level(param_10, v_41); + gl_TessLevelInner[0] = half(inner); + gl_TessLevelInner[1] = half(inner); +} + +kernel void main0(main0_in in [[stage_in]], constant UBO& v_41 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + float2 p0 = gl_in[0].vPatchPosBase; + float2 param = p0; + if (!frustum_cull(param, v_41)) + { + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0); + spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0); + spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0); + } + else + { + float2 param_1 = p0; + compute_tess_levels(param_1, v_41, patchOut.vOutPatchPosBase, patchOut.vPatchLods, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, spvTessLevel[gl_PrimitiveID].insideTessellationFactor); + } +} + diff --git a/reference/shaders-msl/vert/in_out_array_mat.vert b/reference/shaders-msl/vert/in_out_array_mat.vert index 95be574a..ec0a3b77 100644 --- a/reference/shaders-msl/vert/in_out_array_mat.vert +++ b/reference/shaders-msl/vert/in_out_array_mat.vert @@ -44,7 +44,7 @@ void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, color = colors[2]; } -void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], thread float3& inNormal) +void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], float3 inNormal) { outTransModel[2] = float4(inNormal, 1.0); write_deeper_in_function(outTransModel, ubo, color, colors); diff --git a/reference/shaders-msl/vert/leaf-function.capture.vert b/reference/shaders-msl/vert/leaf-function.capture.vert index 5a8469d1..664e65a5 100644 --- a/reference/shaders-msl/vert/leaf-function.capture.vert +++ b/reference/shaders-msl/vert/leaf-function.capture.vert @@ -22,7 +22,7 @@ struct main0_in float3 aNormal [[attribute(1)]]; }; -void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal) +void set_output(device float4& gl_Position, constant UBO& v_18, float4 aVertex, device float3& vNormal, float3 aNormal) { gl_Position = v_18.uMVP * aVertex; vNormal = aNormal; diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert index cd06fdda..4265f710 100644 --- a/reference/shaders-msl/vert/return-array.vert +++ b/reference/shaders-msl/vert/return-array.vert @@ -36,7 +36,7 @@ void test(thread float4 (&SPIRV_Cross_return_value)[2]) spvArrayCopyFromConstant1(SPIRV_Cross_return_value, _20); } -void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1) +void test2(thread float4 (&SPIRV_Cross_return_value)[2], float4 vInput0, float4 vInput1) { float4 foobar[2]; foobar[0] = vInput0; diff --git a/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc new file mode 100644 index 00000000..0fd4dce2 --- /dev/null +++ b/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -0,0 +1,248 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 2 +; Bound: 162 +; Schema: 0 + OpCapability Tessellation + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationControl %hs_main "main" %p_pos %p_1 %i_1 %_entryPointOutput_pos %_entryPointOutput %_patchConstantOutput_EdgeTess %_patchConstantOutput_InsideTess + OpExecutionMode %hs_main OutputVertices 3 + OpExecutionMode %hs_main Triangles + OpExecutionMode %hs_main SpacingFractionalOdd + OpExecutionMode %hs_main VertexOrderCw + OpSource HLSL 500 + OpName %hs_main "hs_main" + OpName %VertexOutput "VertexOutput" + OpMemberName %VertexOutput 0 "pos" + OpMemberName %VertexOutput 1 "uv" + OpName %HSOut "HSOut" + OpMemberName %HSOut 0 "pos" + OpMemberName %HSOut 1 "uv" + OpName %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ "@hs_main(struct-VertexOutput-vf4-vf21[3];u1;" + OpName %p "p" + OpName %i "i" + OpName %HSConstantOut "HSConstantOut" + OpMemberName %HSConstantOut 0 "EdgeTess" + OpMemberName %HSConstantOut 1 "InsideTess" + OpName %PatchHS_struct_VertexOutput_vf4_vf21_3__ "PatchHS(struct-VertexOutput-vf4-vf21[3];" + OpName %patch "patch" + OpName %output "output" + OpName %p_0 "p" + OpName %p_pos "p.pos" + OpName %VertexOutput_0 "VertexOutput" + OpMemberName %VertexOutput_0 0 "uv" + OpName %p_1 "p" + OpName %i_0 "i" + OpName %i_1 "i" + OpName %flattenTemp "flattenTemp" + OpName %param "param" + OpName %param_0 "param" + OpName %_entryPointOutput_pos "@entryPointOutput.pos" + OpName %HSOut_0 "HSOut" + OpMemberName %HSOut_0 0 "uv" + OpName %_entryPointOutput "@entryPointOutput" + OpName %_patchConstantResult "@patchConstantResult" + OpName %param_1 "param" + OpName %_patchConstantOutput_EdgeTess "@patchConstantOutput.EdgeTess" + OpName %_patchConstantOutput_InsideTess "@patchConstantOutput.InsideTess" + OpName %output_0 "output" + OpDecorate %p_pos BuiltIn Position + OpDecorate %p_1 Location 0 + OpDecorate %i_1 BuiltIn InvocationId + OpDecorate %_entryPointOutput_pos BuiltIn Position + OpDecorate %_entryPointOutput Location 0 + OpDecorate %_patchConstantOutput_EdgeTess Patch + OpDecorate %_patchConstantOutput_EdgeTess BuiltIn TessLevelOuter + OpDecorate %_patchConstantOutput_InsideTess Patch + OpDecorate %_patchConstantOutput_InsideTess BuiltIn TessLevelInner + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v2float = OpTypeVector %float 2 +%VertexOutput = OpTypeStruct %v4float %v2float + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 +%_arr_VertexOutput_uint_3 = OpTypeArray %VertexOutput %uint_3 +%_ptr_Function__arr_VertexOutput_uint_3 = OpTypePointer Function %_arr_VertexOutput_uint_3 +%_ptr_Function_uint = OpTypePointer Function %uint + %HSOut = OpTypeStruct %v4float %v2float + %16 = OpTypeFunction %HSOut %_ptr_Function__arr_VertexOutput_uint_3 %_ptr_Function_uint +%_arr_float_uint_3 = OpTypeArray %float %uint_3 +%HSConstantOut = OpTypeStruct %_arr_float_uint_3 %float + %23 = OpTypeFunction %HSConstantOut %_ptr_Function__arr_VertexOutput_uint_3 +%_ptr_Function_HSOut = OpTypePointer Function %HSOut + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %int_1 = OpConstant %int 1 +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 + %p_pos = OpVariable %_ptr_Input__arr_v4float_uint_3 Input +%_ptr_Input_v4float = OpTypePointer Input %v4float +%VertexOutput_0 = OpTypeStruct %v2float +%_arr_VertexOutput_0_uint_3 = OpTypeArray %VertexOutput_0 %uint_3 +%_ptr_Input__arr_VertexOutput_0_uint_3 = OpTypePointer Input %_arr_VertexOutput_0_uint_3 + %p_1 = OpVariable %_ptr_Input__arr_VertexOutput_0_uint_3 Input +%_ptr_Input_v2float = OpTypePointer Input %v2float + %int_2 = OpConstant %int 2 +%_ptr_Input_uint = OpTypePointer Input %uint + %i_1 = OpVariable %_ptr_Input_uint Input +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_entryPointOutput_pos = OpVariable %_ptr_Output__arr_v4float_uint_3 Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %HSOut_0 = OpTypeStruct %v2float +%_arr_HSOut_0_uint_3 = OpTypeArray %HSOut_0 %uint_3 +%_ptr_Output__arr_HSOut_0_uint_3 = OpTypePointer Output %_arr_HSOut_0_uint_3 +%_entryPointOutput = OpVariable %_ptr_Output__arr_HSOut_0_uint_3 Output +%_ptr_Output_v2float = OpTypePointer Output %v2float + %uint_2 = OpConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_ptr_Function_HSConstantOut = OpTypePointer Function %HSConstantOut + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_patchConstantOutput_EdgeTess = OpVariable %_ptr_Output__arr_float_uint_4 Output +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Output_float = OpTypePointer Output %float +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_patchConstantOutput_InsideTess = OpVariable %_ptr_Output__arr_float_uint_2 Output + %float_1 = OpConstant %float 1 + %hs_main = OpFunction %void None %3 + %5 = OpLabel + %p_0 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %i_0 = OpVariable %_ptr_Function_uint Function +%flattenTemp = OpVariable %_ptr_Function_HSOut Function + %param = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %param_0 = OpVariable %_ptr_Function_uint Function +%_patchConstantResult = OpVariable %_ptr_Function_HSConstantOut Function + %param_1 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function + %50 = OpAccessChain %_ptr_Input_v4float %p_pos %int_0 + %51 = OpLoad %v4float %50 + %52 = OpAccessChain %_ptr_Function_v4float %p_0 %int_0 %int_0 + OpStore %52 %51 + %58 = OpAccessChain %_ptr_Input_v2float %p_1 %int_0 %int_0 + %59 = OpLoad %v2float %58 + %60 = OpAccessChain %_ptr_Function_v2float %p_0 %int_0 %int_1 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Input_v4float %p_pos %int_1 + %62 = OpLoad %v4float %61 + %63 = OpAccessChain %_ptr_Function_v4float %p_0 %int_1 %int_0 + OpStore %63 %62 + %64 = OpAccessChain %_ptr_Input_v2float %p_1 %int_1 %int_0 + %65 = OpLoad %v2float %64 + %66 = OpAccessChain %_ptr_Function_v2float %p_0 %int_1 %int_1 + OpStore %66 %65 + %68 = OpAccessChain %_ptr_Input_v4float %p_pos %int_2 + %69 = OpLoad %v4float %68 + %70 = OpAccessChain %_ptr_Function_v4float %p_0 %int_2 %int_0 + OpStore %70 %69 + %71 = OpAccessChain %_ptr_Input_v2float %p_1 %int_2 %int_0 + %72 = OpLoad %v2float %71 + %73 = OpAccessChain %_ptr_Function_v2float %p_0 %int_2 %int_1 + OpStore %73 %72 + %77 = OpLoad %uint %i_1 + OpStore %i_0 %77 + %80 = OpLoad %_arr_VertexOutput_uint_3 %p_0 + OpStore %param %80 + %82 = OpLoad %uint %i_0 + OpStore %param_0 %82 + %83 = OpFunctionCall %HSOut %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ %param %param_0 + OpStore %flattenTemp %83 + %86 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0 + %87 = OpLoad %v4float %86 + %94 = OpLoad %uint %i_1 + %89 = OpAccessChain %_ptr_Output_v4float %_entryPointOutput_pos %94 + OpStore %89 %87 + %95 = OpAccessChain %_ptr_Function_v2float %flattenTemp %int_1 + %96 = OpLoad %v2float %95 + %98 = OpAccessChain %_ptr_Output_v2float %_entryPointOutput %94 %int_0 + OpStore %98 %96 + OpControlBarrier %uint_2 %uint_1 %uint_0 + %102 = OpLoad %uint %i_1 + %104 = OpIEqual %bool %102 %int_0 + OpSelectionMerge %106 None + OpBranchConditional %104 %105 %106 + %105 = OpLabel + %110 = OpLoad %_arr_VertexOutput_uint_3 %p_0 + OpStore %param_1 %110 + %111 = OpFunctionCall %HSConstantOut %PatchHS_struct_VertexOutput_vf4_vf21_3__ %param_1 + OpStore %_patchConstantResult %111 + %117 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_0 + %118 = OpLoad %float %117 + %120 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_0 + OpStore %120 %118 + %121 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_1 + %122 = OpLoad %float %121 + %123 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_1 + OpStore %123 %122 + %124 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_2 + %125 = OpLoad %float %124 + %126 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_2 + OpStore %126 %125 + %130 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_1 + %131 = OpLoad %float %130 + %132 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_InsideTess %int_0 + OpStore %132 %131 + OpBranch %106 + %106 = OpLabel + OpReturn + OpFunctionEnd +%_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ = OpFunction %HSOut None %16 + %p = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3 + %i = OpFunctionParameter %_ptr_Function_uint + %20 = OpLabel + %output = OpVariable %_ptr_Function_HSOut Function + %31 = OpLoad %uint %i + %33 = OpAccessChain %_ptr_Function_v4float %p %31 %int_0 + %34 = OpLoad %v4float %33 + %35 = OpAccessChain %_ptr_Function_v4float %output %int_0 + OpStore %35 %34 + %37 = OpLoad %uint %i + %39 = OpAccessChain %_ptr_Function_v2float %p %37 %int_1 + %40 = OpLoad %v2float %39 + %41 = OpAccessChain %_ptr_Function_v2float %output %int_1 + OpStore %41 %40 + %42 = OpLoad %HSOut %output + OpReturnValue %42 + OpFunctionEnd +%PatchHS_struct_VertexOutput_vf4_vf21_3__ = OpFunction %HSConstantOut None %23 + %patch = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3 + %26 = OpLabel + %output_0 = OpVariable %_ptr_Function_HSConstantOut Function + %135 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %136 = OpLoad %v2float %135 + %137 = OpCompositeConstruct %v2float %float_1 %float_1 + %138 = OpFAdd %v2float %137 %136 + %139 = OpCompositeExtract %float %138 0 + %140 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_0 + OpStore %140 %139 + %141 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %142 = OpLoad %v2float %141 + %143 = OpCompositeConstruct %v2float %float_1 %float_1 + %144 = OpFAdd %v2float %143 %142 + %145 = OpCompositeExtract %float %144 0 + %146 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_1 + OpStore %146 %145 + %147 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %148 = OpLoad %v2float %147 + %149 = OpCompositeConstruct %v2float %float_1 %float_1 + %150 = OpFAdd %v2float %149 %148 + %151 = OpCompositeExtract %float %150 0 + %152 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_2 + OpStore %152 %151 + %153 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1 + %154 = OpLoad %v2float %153 + %155 = OpCompositeConstruct %v2float %float_1 %float_1 + %156 = OpFAdd %v2float %155 %154 + %157 = OpCompositeExtract %float %156 0 + %158 = OpAccessChain %_ptr_Function_float %output_0 %int_1 + OpStore %158 %157 + %159 = OpLoad %HSConstantOut %output_0 + OpReturnValue %159 + OpFunctionEnd diff --git a/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc b/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc new file mode 100644 index 00000000..a258afb3 --- /dev/null +++ b/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc @@ -0,0 +1,32 @@ +#version 450 +layout(vertices = 1) out; + +in gl_PerVertex +{ + vec4 gl_Position; +} gl_in[gl_MaxPatchVertices]; + +out gl_PerVertex +{ + vec4 gl_Position; +} gl_out[1]; + +layout(location = 0) patch out vec3 vFoo; + +void set_position() +{ + gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position; +} + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); + + set_position(); +} diff --git a/shaders-msl/tesc/basic.tesc b/shaders-msl/tesc/basic.tesc new file mode 100644 index 00000000..0a41f98c --- /dev/null +++ b/shaders-msl/tesc/basic.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(location = 0) patch out vec3 vFoo; + +layout(vertices = 1) out; + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); +} diff --git a/shaders-msl/tesc/water_tess.tesc b/shaders-msl/tesc/water_tess.tesc new file mode 100644 index 00000000..3ecdc3d1 --- /dev/null +++ b/shaders-msl/tesc/water_tess.tesc @@ -0,0 +1,115 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 1) out; +layout(location = 0) in vec2 vPatchPosBase[]; + +layout(std140) uniform UBO +{ + vec4 uScale; + highp vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +}; + +layout(location = 1) patch out vec2 vOutPatchPosBase; +layout(location = 2) patch out vec4 vPatchLods; + +float lod_factor(vec2 pos_) +{ + vec2 pos = pos_ * uScale.xy; + vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod); + return clamp(level, 0.0, uMaxTessLevel.x); +} + +float tess_level(float lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +vec4 tess_level(vec4 lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +// Guard band for vertex displacement. +#define GUARD_BAND 10.0 +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy; + vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy; + + vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y); + vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y); + vec3 center = 0.5 * (bb_min + bb_max); + float radius = 0.5 * length(bb_max - bb_min); + + vec3 f0 = vec3( + dot(uFrustum[0], vec4(center, 1.0)), + dot(uFrustum[1], vec4(center, 1.0)), + dot(uFrustum[2], vec4(center, 1.0))); + + vec3 f1 = vec3( + dot(uFrustum[3], vec4(center, 1.0)), + dot(uFrustum[4], vec4(center, 1.0)), + dot(uFrustum[5], vec4(center, 1.0))); + + return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius)))); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + + float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize); + float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize); + float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize); + float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize); + float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize); + float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize); + float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize); + float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize); + float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize); + + vec4 lods = vec4( + dot(vec4(l01, l11, l02, l12), vec4(0.25)), + dot(vec4(l00, l10, l01, l11), vec4(0.25)), + dot(vec4(l10, l20, l11, l21), vec4(0.25)), + dot(vec4(l11, l21, l12, l22), vec4(0.25))); + + vPatchLods = lods; + + vec4 outer_lods = min(lods.xyzw, lods.yzwx); + vec4 levels = tess_level(outer_lods); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float inner = tess_level(min(min_lod, l11)); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + if (!frustum_cull(p0)) + { + gl_TessLevelOuter[0] = -1.0; + gl_TessLevelOuter[1] = -1.0; + gl_TessLevelOuter[2] = -1.0; + gl_TessLevelOuter[3] = -1.0; + gl_TessLevelInner[0] = -1.0; + gl_TessLevelInner[1] = -1.0; + } + else + { + compute_tess_levels(p0); + } +} + diff --git a/spirv_common.hpp b/spirv_common.hpp index aa32142d..e3246e83 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -1381,6 +1381,8 @@ struct Meta { uint32_t packed_type = 0; bool packed = false; + uint32_t ib_member_index = -1; + uint32_t ib_orig_id = 0; } extended; }; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 676fb277..4ef8d9be 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -1094,6 +1094,22 @@ const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const return get(get_variable_data_type_id(var)); } +SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) +{ + SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get(type->parent_type); + return *type; +} + +const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const +{ + const SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get(type->parent_type); + return *type; +} + bool Compiler::is_sampled_image_type(const SPIRType &type) { return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 && @@ -1183,6 +1199,14 @@ void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decorati case SPIRVCrossDecorationPackedType: dec.extended.packed_type = value; break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = value; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = value; + break; } } @@ -1201,6 +1225,14 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext case SPIRVCrossDecorationPackedType: dec.extended.packed_type = value; break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = value; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = value; + break; } } @@ -1218,6 +1250,12 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco case SPIRVCrossDecorationPackedType: return dec.extended.packed_type; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id; } return 0; @@ -1240,6 +1278,12 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, case SPIRVCrossDecorationPackedType: return dec.extended.packed_type; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id; } return 0; @@ -1259,6 +1303,12 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati case SPIRVCrossDecorationPackedType: return dec.extended.packed_type != 0; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index != (uint32_t)-1; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id != 0; } return false; @@ -1281,6 +1331,12 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext case SPIRVCrossDecorationPackedType: return dec.extended.packed_type != 0; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index != (uint32_t)-1; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id != 0; } return false; @@ -1298,6 +1354,14 @@ void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decora case SPIRVCrossDecorationPackedType: dec.extended.packed_type = 0; break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = -1; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = 0; + break; } } @@ -1315,6 +1379,14 @@ void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, E case SPIRVCrossDecorationPackedType: dec.extended.packed_type = 0; break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = -1; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = 0; + break; } } @@ -3527,7 +3599,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args auto *type = &compiler.get_variable_data_type(*var); auto &flags = - type->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; + var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; uint32_t count = length - 3; args += 3; diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 377f48ea..3a46ae2a 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -117,7 +117,9 @@ struct EntryPoint enum ExtendedDecorations { SPIRVCrossDecorationPacked, - SPIRVCrossDecorationPackedType + SPIRVCrossDecorationPackedType, + SPIRVCrossDecorationInterfaceMemberIndex, + SPIRVCrossDecorationInterfaceOrigID, }; class Compiler @@ -201,6 +203,12 @@ public: // Gets the SPIR-V type underlying a variable. const SPIRType &get_variable_data_type(const SPIRVariable &var) const; + // Gets the SPIR-V element type underlying an array variable. + SPIRType &get_variable_element_type(const SPIRVariable &var); + + // Gets the SPIR-V element type underlying an array variable. + const SPIRType &get_variable_element_type(const SPIRVariable &var) const; + // Returns if the given type refers to a sampled image. bool is_sampled_image_type(const SPIRType &type); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 06a3e427..36dd8de0 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -90,7 +90,9 @@ void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t com void CompilerMSL::build_implicit_builtins() { bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); - if (need_subpass_input || need_sample_pos || capture_output_to_buffer) + bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex; + bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl; + if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params) { bool has_frag_coord = false; bool has_sample_id = false; @@ -98,6 +100,8 @@ void CompilerMSL::build_implicit_builtins() bool has_base_vertex = false; bool has_instance_idx = false; bool has_base_instance = false; + bool has_invocation_id = false; + bool has_primitive_id = false; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) @@ -115,7 +119,7 @@ void CompilerMSL::build_implicit_builtins() has_sample_id = true; } - if (capture_output_to_buffer) + if (need_vertex_params) { switch (ir.meta[var.self].decoration.builtin_type) { @@ -139,6 +143,23 @@ void CompilerMSL::build_implicit_builtins() break; } } + + if (need_tesc_params) + { + switch (ir.meta[var.self].decoration.builtin_type) + { + case BuiltInInvocationId: + builtin_invocation_id_id = var.self; + has_invocation_id = true; + break; + case BuiltInPrimitiveId: + builtin_primitive_id_id = var.self; + has_primitive_id = true; + break; + default: + break; + } + } }); if (!has_frag_coord && need_subpass_input) @@ -194,8 +215,7 @@ void CompilerMSL::build_implicit_builtins() builtin_sample_id_id = var_id; } - if (capture_output_to_buffer && - (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) + if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) { uint32_t offset = ir.increase_bound_by(2); uint32_t type_id = offset; @@ -251,6 +271,45 @@ void CompilerMSL::build_implicit_builtins() builtin_base_instance_id = var_id; } } + + if (need_tesc_params && (!has_invocation_id || !has_primitive_id)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + if (!has_invocation_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_InvocationID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId); + builtin_invocation_id_id = var_id; + } + if (!has_primitive_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_PrimitiveID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId); + builtin_primitive_id_id = var_id; + } + } } if (needs_aux_buffer_def) @@ -320,12 +379,31 @@ static string create_sampler_address(const char *prefix, MSLSamplerAddress addr) } } +SPIRType &CompilerMSL::get_stage_in_struct_type() +{ + auto &si_var = get(stage_in_var_id); + return get_variable_data_type(si_var); +} + SPIRType &CompilerMSL::get_stage_out_struct_type() { auto &so_var = get(stage_out_var_id); return get_variable_data_type(so_var); } +SPIRType &CompilerMSL::get_patch_stage_out_struct_type() +{ + auto &so_var = get(patch_stage_out_var_id); + return get_variable_data_type(so_var); +} + +std::string CompilerMSL::get_tess_factor_struct_name() +{ + if (get_entry_point().flags.get(ExecutionModeTriangles)) + return "MTLTriangleTessellationFactorsHalf"; + return "MTLQuadTessellationFactorsHalf"; +} + void CompilerMSL::emit_entry_point_declarations() { // FIXME: Get test coverage here ... @@ -529,8 +607,15 @@ string CompilerMSL::compile() // Do output first to ensure out. is declared at top of entry function. qual_pos_var_name = ""; stage_out_var_id = add_interface_block(StorageClassOutput); + patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); stage_in_var_id = add_interface_block(StorageClassInput); + if (get_execution_model() == ExecutionModelTessellationControl) + { + stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput); + stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput); + } + // Metal vertex functions that define no output must disable rasterization and return void. if (!stage_out_var_id) is_rasterization_disabled = true; @@ -615,6 +700,14 @@ void CompilerMSL::preprocess_op_codes() // Metal vertex functions that write to resources must disable rasterization and return void. if (preproc.uses_resource_write) is_rasterization_disabled = true; + + // Tessellation control shaders are run as compute functions in Metal, and so + // must capture their output to a buffer. + if (get_execution_model() == ExecutionModelTessellationControl) + { + is_rasterization_disabled = true; + capture_output_to_buffer = true; + } } // Move the Private and Workgroup global variables to the entry function. @@ -767,13 +860,51 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: // Add the global variables as arguments to the function if (func_id != ir.default_entry_point) { + bool added_in = false; + bool added_out = false; for (uint32_t arg_id : added_arg_ids) { auto &var = get(arg_id); uint32_t type_id = var.basetype; auto *p_type = &get(type_id); + BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); - if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) + if (get_execution_model() == ExecutionModelTessellationControl && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + !has_decoration(arg_id, DecorationPatch) && + (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + p_type->basetype == SPIRType::Struct)) + { + // Tessellation control shaders see inputs and per-vertex outputs as arrays. + // We collected them into a structure; we must pass the array of this + // structure to the function. + std::string name; + if (var.storage == StorageClassInput) + { + if (added_in) + continue; + name = input_wg_var_name; + arg_id = stage_in_ptr_var_id; + added_in = true; + } + else if (var.storage == StorageClassOutput) + { + if (added_out) + continue; + name = "gl_out"; + arg_id = stage_out_ptr_var_id; + added_out = true; + } + type_id = get(arg_id).basetype; + p_type = &get(type_id); + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + set(next_id, type_id, StorageClassFunction, 0, arg_id); + + set_name(next_id, name); + } + else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) { // Get the pointee type type_id = get_pointee_type_id(type_id); @@ -872,8 +1003,8 @@ void CompilerMSL::mark_as_packable(SPIRType &type) void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage) { MSLVertexAttr *p_va; - if ((get_entry_point().model == ExecutionModelVertex) && (storage == StorageClassInput) && - (p_va = vtx_attrs_by_location[location])) + if ((get_execution_model() == ExecutionModelVertex || get_execution_model() == ExecutionModelTessellationControl) && + (storage == StorageClassInput) && (p_va = vtx_attrs_by_location[location])) p_va->used_by_shader = true; } @@ -899,7 +1030,7 @@ uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t comp } void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var) + SPIRType &ib_type, SPIRVariable &var, bool strip_array) { bool is_builtin = is_builtin_variable(var); BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); @@ -913,11 +1044,12 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin); var.basetype = type_id; - auto &type = get(type_id); + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t target_components = 0; uint32_t type_components = type.vecsize; bool padded_output = false; + type_id = type.self; // Check if we need to pad fragment output to match a certain number of components. if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components && get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput) @@ -932,7 +1064,7 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co } } - ib_type.member_types.push_back(get_pointee_type_id(type_id)); + ib_type.member_types.push_back(type_id); // Give the member a name string mbr_name = ensure_valid_name(to_expression(var.self), "m"); @@ -965,7 +1097,8 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co if (get_decoration_bitset(var.self).get(DecorationLocation)) { uint32_t locn = get_decoration(var.self, DecorationLocation); - if (storage == StorageClassInput && get_entry_point().model == ExecutionModelVertex) + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationControl)) { type_id = ensure_correct_attribute_type(type_id, locn); var.basetype = type_id; @@ -1004,13 +1137,15 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); if (is_sample) set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); } void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var) + SPIRType &ib_type, SPIRVariable &var, bool strip_array) { auto &entry_func = get(ir.default_entry_point); - auto &var_type = get_variable_data_type(var); + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t elem_cnt = 0; if (is_matrix(var_type)) @@ -1077,7 +1212,8 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage if (get_decoration_bitset(var.self).get(DecorationLocation)) { uint32_t locn = get_decoration(var.self, DecorationLocation) + i; - if (storage == StorageClassInput && get_entry_point().model == ExecutionModelVertex) + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationControl)) { var.basetype = ensure_correct_attribute_type(var.basetype, locn); uint32_t mbr_type_id = ensure_correct_attribute_type(usable_type->self, locn); @@ -1103,6 +1239,8 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage if (is_sample) set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + switch (storage) { case StorageClassInput: @@ -1130,9 +1268,9 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage } } -uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx) +uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) { - auto &type = get(var.basetype); + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t location = get_decoration(var.self, DecorationLocation); for (uint32_t i = 0; i < mbr_idx; i++) @@ -1160,10 +1298,10 @@ uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, u void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var, - uint32_t mbr_idx) + uint32_t mbr_idx, bool strip_array) { auto &entry_func = get(ir.default_entry_point); - auto &var_type = get_variable_data_type(var); + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); bool is_flat = has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); @@ -1217,7 +1355,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass } else if (has_decoration(var.self, DecorationLocation)) { - uint32_t locn = get_accumulated_member_location(var, mbr_idx) + i; + uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array) + i; set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, storage); } @@ -1235,6 +1373,9 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass if (is_sample) set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); + // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. switch (storage) { @@ -1259,9 +1400,10 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass } void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx) + SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx, + bool strip_array) { - auto &var_type = get_variable_data_type(var); + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); auto &entry_func = get(ir.default_entry_point); BuiltIn builtin = BuiltInMax; @@ -1289,7 +1431,7 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor // Update the original variable reference to include the structure reference string qual_var_name = ib_var_ref + "." + mbr_name; - if (is_builtin) + if (is_builtin || get_execution_model() == ExecutionModelTessellationControl) { // For the builtin gl_PerVertex, we cannot treat it as a block anyways, // so redirect to qualified name. @@ -1321,7 +1463,8 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) { uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); - if (storage == StorageClassInput && get_entry_point().model == ExecutionModelVertex) + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationControl)) { mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); var_type.member_types[mbr_idx] = mbr_type_id; @@ -1334,8 +1477,9 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor { // The block itself might have a location and in this case, all members of the block // receive incrementing locations. - uint32_t locn = get_accumulated_member_location(var, mbr_idx); - if (storage == StorageClassInput && get_entry_point().model == ExecutionModelVertex) + uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array); + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationControl)) { mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); var_type.member_types[mbr_idx] = mbr_type_id; @@ -1369,17 +1513,23 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); if (is_sample) set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); } void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, - SPIRVariable &var) + SPIRVariable &var, bool strip_array) { auto &entry_func = get(ir.default_entry_point); - auto &var_type = get_variable_data_type(var); + // Tessellation control I/O variables and tessellation evaluation per-point inputs are + // usually declared as arrays. In these cases, we want to add the element type to the + // interface block, since in Metal it's the interface block itself which is arrayed. + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); if (var_type.basetype == SPIRType::Struct) { - if (!is_builtin_type(var_type)) + if (!is_builtin_type(var_type) && !strip_array) { // For I/O blocks or structs, we will need to pass the block itself around // to functions if they are used globally in leaf functions. @@ -1391,23 +1541,38 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st vars_needing_early_declaration.push_back(var.self); } - // Flatten the struct members into the interface struct - for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + // Per-vertex outputs in a tess. control shader need special handling. + if (strip_array && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock)) { - BuiltIn builtin = BuiltInMax; - bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); - auto &mbr_type = get(var_type.member_types[mbr_idx]); - - if (!is_builtin || has_active_builtin(builtin, storage)) + // We can't flatten the struct, because of the requirement that these be passed around as arrays. + // In Metal, the interface block itself is arrayed. This makes things very complicated, since + // stage-in structures in MSL don't support nested structures. Luckily, for stage-out in a + // tessellation control shader, we can get away with this because the structure is stored + // to a buffer, not returned. + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + } + else + { + // Flatten the struct members into the interface struct + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) { - if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) && - (is_matrix(mbr_type) || is_array(mbr_type))) + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + auto &mbr_type = get(var_type.member_types[mbr_idx]); + + if (!is_builtin || has_active_builtin(builtin, storage)) { - add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx); - } - else - { - add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx); + if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) && + (is_matrix(mbr_type) || is_array(mbr_type))) + { + add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, + strip_array); + } + else + { + add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, + strip_array); + } } } } @@ -1424,28 +1589,95 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) && (is_matrix(var_type) || is_array(var_type))) { - add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var); + add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); } else { - add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var); + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); } } } } +// Fix up the mapping of variables to interface member indices, which is used to compile access chains +// for per-vertex variables in a tessellation control shader. +void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) +{ + // Only needed for tessellation control shaders. + if (get_execution_model() != ExecutionModelTessellationControl) + return; + + bool in_array = false; + for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++) + { + auto &mbr_dec = ir.meta[ib_type_id].members[i]; + uint32_t var_id = mbr_dec.extended.ib_orig_id; + if (!var_id) + continue; + auto &var = get(var_id); + + // Unfortunately, all this complexity is needed to handle flattened structs and/or + // arrays. + if (storage == StorageClassInput) + { + auto &type = get_variable_element_type(var); + if (is_array(type) || is_matrix(type)) + { + if (in_array) + continue; + in_array = true; + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + else + { + if (type.basetype == SPIRType::Struct) + { + uint32_t mbr_idx = + get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + auto &mbr_type = get(type.member_types[mbr_idx]); + + if (is_array(mbr_type) || is_matrix(mbr_type)) + { + if (in_array) + continue; + in_array = true; + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + else + { + in_array = false; + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + } + else + { + in_array = false; + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + } + } + else + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } +} + // Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput. // Returns the ID of the newly added variable, or zero if no variable was added. -uint32_t CompilerMSL::add_interface_block(StorageClass storage) +uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) { // Accumulate the variables that should appear in the interface struct vector vars; - bool incl_builtins = (storage == StorageClassOutput); + bool incl_builtins = (storage == StorageClassOutput || get_execution_model() == ExecutionModelTessellationControl); - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { auto &type = this->get(var.basetype); + BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) && - !is_hidden_variable(var, incl_builtins) && type.pointer) + !is_hidden_variable(var, incl_builtins) && type.pointer && + has_decoration(var_id, DecorationPatch) == patch && + (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || + bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask)) { vars.push_back(&var); } @@ -1470,15 +1702,28 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage) var.initializer = next_id++; string ib_var_ref; + auto &entry_func = get(ir.default_entry_point); switch (storage) { case StorageClassInput: ib_var_ref = stage_in_var_name; + if (get_execution_model() == ExecutionModelTessellationControl) + { + // Add a hook to populate the shared workgroup memory containing + // the gl_in array. + entry_func.fixup_hooks_in.push_back([=]() { + // Can't use PatchVertices yet; the hook for that may not have run yet. + statement("if (", to_expression(builtin_invocation_id_id), " < ", "spvIndirectParams[0])"); + statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref, + ";"); + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + }); + } break; case StorageClassOutput: { - ib_var_ref = stage_out_var_name; + ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name; // Add the output interface struct as a local variable to the entry function. // If the entry point should return the output struct, set the entry function @@ -1486,7 +1731,6 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage) // Indicate the output var requires early initialization. bool ep_should_return_output = !get_is_rasterization_disabled(); uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0; - auto &entry_func = get(ir.default_entry_point); if (!capture_output_to_buffer) { entry_func.add_local_variable(ib_var_id); @@ -1500,21 +1744,42 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage) } else { - // Instead of declaring a struct variable to hold the output and then - // copying that to the output buffer, we'll declare the output variable - // as a reference to the final output element in the buffer. Then we can - // avoid the extra copy. - entry_func.fixup_hooks_in.push_back([=]() { - if (stage_out_var_id) - { - // The first member of the indirect buffer is always the number of vertices - // to draw. - statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", - output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ", - to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", - to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id), "];"); - } - }); + switch (get_execution_model()) + { + case ExecutionModelVertex: + // Instead of declaring a struct variable to hold the output and then + // copying that to the output buffer, we'll declare the output variable + // as a reference to the final output element in the buffer. Then we can + // avoid the extra copy. + entry_func.fixup_hooks_in.push_back([=]() { + if (stage_out_var_id) + { + // The first member of the indirect buffer is always the number of vertices + // to draw. + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", + output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ", + to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", + to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id), + "];"); + } + }); + break; + case ExecutionModelTessellationControl: + if (patch) + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", + patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "];"); + }); + else + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", + get_entry_point().output_vertices, "];"); + }); + break; + default: + break; + } } break; } @@ -1527,15 +1792,53 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage) set_name(ib_var_id, ib_var_ref); for (auto p_var : vars) - add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var); + { + bool strip_array = get_execution_model() == ExecutionModelTessellationControl && !patch; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, strip_array); + } // Sort the members of the structure by their locations. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location); member_sorter.sort(); + // The member indices were saved to the original variables, but after the members + // were sorted, those indices are now likely incorrect. Fix those up now. + if (!patch) + fix_up_interface_member_indices(storage, ib_type_id); + return ib_var_id; } +uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage) +{ + if (!ib_var_id) + return 0; + + // Tessellation control per-vertex I/O is presented as an array, so we must + // do the same with our struct here. + uint32_t next_id = ir.increase_bound_by(3); + uint32_t ib_ptr_type_id = next_id++; + auto &ib_type = expression_type(ib_var_id); + auto &ib_ptr_type = set(ib_ptr_type_id, ib_type); + ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self; + ib_ptr_type.pointer = true; + ib_ptr_type.storage = storage == StorageClassInput ? StorageClassWorkgroup : StorageClassStorageBuffer; + ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self]; + // To ensure that get_variable_data_type() doesn't strip off the pointer, + // which we need, use another pointer. + uint32_t ib_ptr_ptr_type_id = next_id++; + auto &ib_ptr_ptr_type = set(ib_ptr_ptr_type_id, ib_ptr_type); + ib_ptr_ptr_type.parent_type = ib_ptr_type_id; + ib_ptr_ptr_type.type_alias = ib_type.self; + ib_ptr_ptr_type.storage = StorageClassFunction; + ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self]; + + uint32_t ib_ptr_var_id = next_id; + set(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0); + set_name(ib_ptr_var_id, storage == StorageClassInput ? input_wg_var_name : "gl_out"); + return ib_ptr_var_id; +} + // Ensure that the type is compatible with the builtin. // If it is, simply return the given type ID. // Otherwise, create a new type, and return it's ID. @@ -2413,6 +2716,7 @@ void CompilerMSL::emit_resources() // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created. emit_interface_block(stage_out_var_id); + emit_interface_block(patch_stage_out_var_id); emit_interface_block(stage_in_var_id); } @@ -2507,9 +2811,11 @@ void CompilerMSL::emit_specialization_constants_and_structs() bool is_declarable_struct = is_struct && !is_builtin_block; // We'll declare this later. - if (stage_out_var_id && get(stage_out_var_id).basetype == type_id) + if (stage_out_var_id && get_stage_out_struct_type().self == type_id) is_declarable_struct = false; - if (stage_in_var_id && get(stage_in_var_id).basetype == type_id) + if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (stage_in_var_id && get_stage_in_struct_type().self == type_id) is_declarable_struct = false; // Align and emit declarable structs...but avoid declaring each more than once. @@ -3016,6 +3322,121 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + if (get_execution_model() == ExecutionModelTessellationControl) + { + // If this is a per-vertex output, remap it to the I/O array buffer. + auto *var = maybe_get(ops[2]); + BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); + if (var && (var->storage == StorageClassInput || var->storage == StorageClassOutput) && + !has_decoration(ops[2], DecorationPatch) && + (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + get_variable_data_type(*var).basetype == SPIRType::Struct)) + { + uint32_t length = instruction.length; + AccessChainMeta meta; + std::vector indices; + uint32_t next_id = ir.increase_bound_by(2); + + indices.reserve(length - 3 + 1); + uint32_t type_id = next_id++; + SPIRType new_uint_type; + new_uint_type.basetype = SPIRType::UInt; + new_uint_type.width = 32; + set(type_id, new_uint_type); + + indices.push_back(ops[3]); + + uint32_t const_mbr_id = next_id++; + uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + if (var->storage == StorageClassInput || + has_decoration(get_variable_element_type(*var).self, DecorationBlock)) + { + uint32_t i = 4; + if (index == (uint32_t)-1) + { + // Maybe this is a struct type in the input class, in which case + // we put it as a decoration on the corresponding member. + index = + get_extended_member_decoration(ops[2], ops[4], SPIRVCrossDecorationInterfaceMemberIndex); + assert(index != (uint32_t)-1); + i++; + } + // In this case, we flattened structures and arrays, so now we have to + // combine the following indices. If we encounter a non-constant index, + // we're hosed. + auto *type = &get_variable_element_type(*var); + for (; i < length; ++i) + { + if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) + break; + + auto &c = get_constant(ops[i]); + index += c.scalar(); + if (type->parent_type) + type = &get(type->parent_type); + } + set(const_mbr_id, type_id, index, false); + indices.push_back(const_mbr_id); + + indices.insert(indices.end(), &ops[i], &ops[length]); + } + else + { + set(const_mbr_id, type_id, index, false); + indices.push_back(const_mbr_id); + + indices.insert(indices.end(), &ops[4], &ops[length]); + } + + // We use the pointer to the base of the input/output array here, + // so this is always a pointer chain. + uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + auto e = access_chain(ptr, indices.data(), indices.size(), get(ops[0]), &meta, true); + auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + expr.loaded_from = var->self; + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); + if (meta.storage_packed_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + break; + } + + // If this is the inner tessellation level, and we're tessellating triangles, + // drop the last index. It isn't an array in this case, so we can't have an + // array reference here. We need to make this ID a variable instead of an + // expression so we don't try to dereference it as a variable pointer. + auto *m = ir.find_meta(var ? var->self : 0); + if (var && m && m->decoration.builtin_type == BuiltInTessLevelInner && + get_entry_point().flags.get(ExecutionModeTriangles)) + { + auto &dest_var = set(ops[1], *var); + dest_var.basetype = ops[0]; + ir.meta[ops[1]] = ir.meta[ops[2]]; + inherit_expression_dependencies(ops[1], ops[2]); + break; + } + } + + CompilerGLSL::emit_instruction(instruction); + break; + case OpStore: if (maybe_emit_array_assignment(ops[0], ops[1])) break; @@ -3127,14 +3548,18 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) { - if (get_entry_point().model != ExecutionModelGLCompute) + if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) return; string bar_stmt = "threadgroup_barrier(mem_flags::"; uint32_t mem_sem = id_mem_sem ? get(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone); - if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask) + if (get_execution_model() == ExecutionModelTessellationControl) + // For tesc shaders, this also affects objects in the Output storage class. + // Since in Metal, these are placed in a device buffer, we have to sync device memory here. + bar_stmt += "mem_device"; + else if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask) bar_stmt += "mem_device"; else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsAtomicCounterMemoryMask)) @@ -3477,7 +3902,7 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id) if (ib_var_id) { auto &ib_var = get(ib_var_id); - auto &ib_type = get(ib_var.basetype); + auto &ib_type = get_variable_data_type(ib_var); assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); emit_struct(ib_type); } @@ -3547,7 +3972,6 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) { uint32_t name_id = arg.id; - string address_space; auto *var = maybe_get(arg.id); if (var) { @@ -3557,13 +3981,10 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) name_id = var->basevariable; var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed. - address_space = get_argument_address_space(*var); } add_local_variable_name(name_id); - if (!address_space.empty()) - decl += address_space + " "; decl += argument_decl(arg); // Manufacture automatic sampler arg for SampledImage texture @@ -4319,6 +4740,36 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in } } + // Tessellation control function inputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInInvocationId: + case BuiltInPrimitiveId: + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + case BuiltInPatchVertices: + return ""; + // Others come from stage input. + default: + break; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Tessellation control function outputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput) + { + // For this type of shader, we always arrange for it to capture its + // output to a buffer. For this reason, qualifiers are irrelevant here. + return ""; + } + // Fragment function inputs if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput) { @@ -4491,6 +4942,10 @@ string CompilerMSL::func_type_decl(SPIRType &type) entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment"; break; + case ExecutionModelTessellationControl: + if (!msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + /* fallthrough */ case ExecutionModelGLCompute: case ExecutionModelKernel: entry_type = "kernel"; @@ -4540,6 +4995,11 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) // No address space for plain values. return type.pointer ? "thread" : ""; + case StorageClassInput: + if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id) + return "threadgroup"; + break; + case StorageClassOutput: if (capture_output_to_buffer) return "device"; @@ -4609,7 +5069,7 @@ string CompilerMSL::entry_point_args(bool append_comma) ep_args += ", "; add_resource_name(var.self); - ep_args += type_to_glsl(type) + " " + to_name(var.self) + " [[stage_in]]"; + ep_args += join(type_to_glsl(type), " ", to_name(var.self), " [[stage_in]]"); } // Output resources, sorted by resource index & type @@ -4726,15 +5186,18 @@ string CompilerMSL::entry_point_args(bool append_comma) } // Builtin variables - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - uint32_t var_id = var.self; + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; // Don't emit SamplePosition as a separate parameter. In the entry // point, we get that by calling get_sample_position() on the sample ID. - if (var.storage == StorageClassInput && is_builtin_variable(var)) + if (var.storage == StorageClassInput && is_builtin_variable(var) && + get_variable_data_type(var).basetype != SPIRType::Struct) { - if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation) + if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation && + bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner && + bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize && + bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance) { if (!ep_args.empty()) ep_args += ", "; @@ -4761,10 +5224,43 @@ string CompilerMSL::entry_point_args(bool append_comma) if (!ep_args.empty()) ep_args += ", "; ep_args += join("device ", type_to_glsl(get_stage_out_struct_type()), "* ", output_buffer_var_name, - " [[buffer(", msl_options.shader_output_buffer_index, ")]], "); + " [[buffer(", msl_options.shader_output_buffer_index, ")]]"); + } + + if (stage_out_var_id || get_execution_model() == ExecutionModelTessellationControl) + { + if (!ep_args.empty()) + ep_args += ", "; ep_args += join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); } + + // Tessellation control shaders get three additional parameters: + // a buffer to hold the per-patch data, a buffer to hold the per-patch + // tessellation levels, and a block of workgroup memory to hold the + // input control point data. + if (get_execution_model() == ExecutionModelTessellationControl) + { + if (patch_stage_out_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", + patch_output_buffer_var_name, " [[buffer(", + convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]"); + } + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(", + convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]"); + if (stage_in_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name, + " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]"); + } + } } if (!ep_args.empty() && append_comma) @@ -4824,6 +5320,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs() statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();"); }); break; + case BuiltInPatchVertices: + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = spvIndirectParams[0];"); + }); + break; default: break; } @@ -4921,7 +5422,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += "const "; bool builtin = is_builtin_variable(var); - if (builtin) + if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id) + decl += type_to_glsl(type, arg.id); + else if (builtin) decl += builtin_type_decl(static_cast(get_decoration(arg.id, DecorationBuiltIn))); else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) decl += join(type_to_glsl(type, arg.id), "*"); @@ -4930,6 +5433,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) bool opaque_handle = storage == StorageClassUniformConstant; + string address_space = get_argument_address_space(var); + if (!builtin && !opaque_handle && !is_pointer && (storage == StorageClassFunction || storage == StorageClassGeneric)) { @@ -4951,6 +5456,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else { + if (!address_space.empty()) + decl = join(address_space, " ", decl); decl += " "; decl += to_expression(name_id); } @@ -4958,6 +5465,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) else if (is_array(type) && !type_is_image) { // Arrays of images and samplers are special cased. + if (!address_space.empty()) + decl = join(address_space, " ", decl); decl += " (&"; decl += to_expression(name_id); decl += ")"; @@ -4965,12 +5474,23 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else if (!opaque_handle) { + // If this is going to be a reference to a variable pointer, the address space + // for the reference has to go before the '&', but after the '*'. + if (!address_space.empty()) + { + if (decl.back() == '*') + decl += join(" ", address_space, " "); + else + decl = join(address_space, " ", decl); + } decl += "&"; decl += " "; decl += to_expression(name_id); } else { + if (!address_space.empty()) + decl = join(address_space, " ", decl); decl += " "; decl += to_expression(name_id); } @@ -5651,6 +6171,7 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) // When used in the entry function, output builtins are qualified with output struct name. // Test storage class as NOT Input, as output builtins might be part of generic type. + // Also don't do this for tessellation shaders. case BuiltInViewportIndex: if (!msl_options.supports_msl_version(2, 0)) SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); @@ -5662,11 +6183,25 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInLayer: case BuiltInFragDepth: case BuiltInSampleMask: + if (get_execution_model() == ExecutionModelTessellationControl) + break; if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; + case BuiltInTessLevelOuter: + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].edgeTessellationFactor"); + break; + + case BuiltInTessLevelInner: + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].insideTessellationFactor"); + break; + default: break; } @@ -5711,6 +6246,21 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); return "viewport_array_index"; + // Tess. control function in + case BuiltInInvocationId: + return "thread_index_in_threadgroup"; + case BuiltInPatchVertices: + // Shouldn't be reached. + SPIRV_CROSS_THROW("PatchVertices is derived from the auxiliary buffer in MSL."); + case BuiltInPrimitiveId: + return "threadgroup_position_in_grid"; + + // Tess. control function out + case BuiltInTessLevelOuter: + case BuiltInTessLevelInner: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Tessellation levels are handled specially in MSL."); + // Fragment function in case BuiltInFrontFacing: return "front_facing"; @@ -5791,6 +6341,20 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin) SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); return "uint"; + // Tess. control function in + case BuiltInInvocationId: + return "uint"; + case BuiltInPatchVertices: + return "uint"; + case BuiltInPrimitiveId: + return "uint"; + + // Tess. control function out + case BuiltInTessLevelInner: + return "half"; + case BuiltInTessLevelOuter: + return "half"; + // Fragment function in case BuiltInFrontFacing: return "bool"; @@ -6306,6 +6870,11 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp expected_type = SPIRType::UInt; break; + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + expected_type = SPIRType::Half; + break; + default: break; } @@ -6316,6 +6885,10 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) { + auto *var = maybe_get_backing_variable(target_id); + if (var) + target_id = var->self; + // Only interested in standalone builtin variables. if (!has_decoration(target_id, DecorationBuiltIn)) return; @@ -6329,15 +6902,28 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr expected_type = SPIRType::UInt; break; + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + expected_type = SPIRType::Half; + break; + default: break; } if (expected_type != expr_type.basetype) { - auto type = expr_type; - type.basetype = expected_type; - expr = bitcast_expression(type, expr_type.basetype, expr); + if (expected_type == SPIRType::Half && expr_type.basetype == SPIRType::Float) + { + // These are of different widths, so we cannot do a straight bitcast. + expr = join("half(", expr, ")"); + } + else + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } } } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 264bc512..8c749ed7 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -170,6 +170,9 @@ public: uint32_t aux_buffer_index = 30; uint32_t indirect_params_buffer_index = 29; uint32_t shader_output_buffer_index = 28; + uint32_t shader_patch_output_buffer_index = 27; + uint32_t shader_tess_factor_buffer_index = 26; + uint32_t shader_input_wg_index = 0; bool enable_point_size_builtin = true; bool disable_rasterization = false; bool capture_output_to_buffer = false; @@ -231,7 +234,8 @@ public: // rasterization if vertex shader requires rasterization to be disabled. bool get_is_rasterization_disabled() const { - return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex); + return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || + get_entry_point().model == spv::ExecutionModelTessellationControl); } // Provide feedback to calling API to allow it to pass an auxiliary @@ -248,6 +252,20 @@ public: return capture_output_to_buffer && stage_out_var_id != 0; } + // Provide feedback to calling API to allow it to pass a patch output + // buffer if the shader needs it. + bool needs_patch_output_buffer() const + { + return capture_output_to_buffer && patch_stage_out_var_id != 0; + } + + // Provide feedback to calling API to allow it to pass an input threadgroup + // buffer if the shader needs it. + bool needs_input_threadgroup_mem() const + { + return capture_output_to_buffer && stage_in_var_id != 0; + } + // An enum of SPIR-V functions that are implemented in additional // source code that is added to the shader if necessary. enum SPVFuncImpl @@ -384,19 +402,24 @@ protected: void extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, std::unordered_set &global_var_ids, std::unordered_set &processed_func_ids); - uint32_t add_interface_block(spv::StorageClass storage); + uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); + uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, - SPIRVariable &var); + SPIRVariable &var, bool strip_array); void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var); + SPIRType &ib_type, SPIRVariable &var, bool strip_array); void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var); + SPIRType &ib_type, SPIRVariable &var, bool strip_array); void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t index); + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + bool strip_array); void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, - SPIRType &ib_type, SPIRVariable &var, uint32_t index); - uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx); + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + bool strip_array); + uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array); + + void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage); uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); @@ -431,7 +454,10 @@ protected: MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index); std::string get_argument_address_space(const SPIRVariable &argument); std::string get_type_address_space(const SPIRType &type); + SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type(); + SPIRType &get_patch_stage_out_struct_type(); + std::string get_tess_factor_struct_name(); void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); @@ -448,6 +474,8 @@ protected: uint32_t builtin_base_vertex_id = 0; uint32_t builtin_instance_idx_id = 0; uint32_t builtin_base_instance_id = 0; + uint32_t builtin_invocation_id_id = 0; + uint32_t builtin_primitive_id_id = 0; uint32_t aux_buffer_id = 0; void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; @@ -468,6 +496,9 @@ protected: MSLResourceBinding next_metal_resource_index; uint32_t stage_in_var_id = 0; uint32_t stage_out_var_id = 0; + uint32_t patch_stage_out_var_id = 0; + uint32_t stage_in_ptr_var_id = 0; + uint32_t stage_out_ptr_var_id = 0; bool has_sampled_images = false; bool needs_vertex_idx_arg = false; bool needs_instance_idx_arg = false; @@ -478,9 +509,13 @@ protected: std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; + std::string patch_stage_out_var_name = "patchOut"; std::string sampler_name_suffix = "Smplr"; std::string swizzle_name_suffix = "Swzl"; + std::string input_wg_var_name = "gl_in"; std::string output_buffer_var_name = "spvOut"; + std::string patch_output_buffer_var_name = "spvPatchOut"; + std::string tess_factor_buffer_var_name = "spvTessLevel"; spv::Op previous_instruction_opcode = spv::OpNop; std::unordered_map constexpr_samplers;