From 5b952d2cbfe38fe34c76b5f3daace8b579e4f9c0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 18 Jan 2022 14:34:00 +0100 Subject: [PATCH] MSL: Rethink how opaque descriptors are passed to leaf functions. We were passing arrays by value which the compiler fails to optimize, causing abyssal performance. To fix this, we need to consider that descriptors can be in constant or const device address spaces. Also, lone descriptors are passed by value, so we explicitly remove address space qualifiers. One failure case is when shader passes a texture/sampler array as an argument. It's all UniformConstant in SPIR-V, but in MSL it might be thread, const device or constant, so that won't work ... Global variable use works fine though, and that should cover 99.9999999% of use cases. --- ...e-load-store-short-vector.invalid.asm.comp | 2 +- ...nput-attachment-unused-frag-coord.asm.frag | 2 +- .../shadow-compare-global-alias.invalid.frag | 8 +- .../frag/texture-access-leaf.swizzle.frag | 2 +- .../vert/functions_nested.vert | 6 +- .../texture-access-function.swizzle.vk.frag | 2 +- ...e-load-store-short-vector.invalid.asm.comp | 2 +- .../depth-image-color-format-fetch.asm.frag | 4 +- .../depth-image-color-format-sampled.asm.frag | 4 +- .../asm/frag/unknown-depth-state.asm.frag | 4 +- .../frag/argument-buffers.msl2.argument.frag | 4 +- ...niform.msl2.argument.discrete.swizzle.frag | 4 +- ...zzle-nonconstant-uniform.msl2.swizzle.frag | 4 +- ...wizzle.msl2.argument.discrete.swizzle.frag | 6 +- ...array-of-texture-swizzle.msl2.swizzle.frag | 4 +- .../frag/helper-invocation.msl21.frag | 2 +- ....device-argument-buffer.argument.msl2.frag | 4 +- .../frag/image-query-lod.msl22.frag | 2 +- ...t-attachment-ms.arrayed-subpass.msl21.frag | 2 +- .../shaders-msl/frag/input-attachment-ms.frag | 2 +- .../input-attachment-ms.multiview.msl21.frag | 2 +- .../input-attachment.arrayed-subpass.frag | 2 +- .../shaders-msl/frag/input-attachment.frag | 2 +- .../frag/input-attachment.multiview.frag | 2 +- ...e-depth-propagate-state-from-resource.frag | 6 +- .../sample-depth-separate-image-sampler.frag | 4 +- .../frag/sampler-image-arrays.msl2.frag | 6 +- reference/shaders-msl/frag/sampler.frag | 2 +- .../frag/separate-image-sampler-argument.frag | 2 +- reference/shaders-msl/tese/water_tess.tese | 2 +- .../vert/resource-arrays-leaf.ios.vert | 2 +- spirv_msl.cpp | 112 ++++++++++++------ spirv_msl.hpp | 1 + 33 files changed, 131 insertions(+), 84 deletions(-) diff --git a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp index fca572e3..53655639 100644 --- a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -6,7 +6,7 @@ using namespace metal; static inline __attribute__((always_inline)) -void _main(thread const uint3& id, thread texture2d TargetTexture) +void _main(thread const uint3& id, texture2d TargetTexture) { float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 storeTemp = loaded + float2(1.0); diff --git a/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag index 93bbaec0..0643acfa 100644 --- a/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag +++ b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d uInput, thread float4& gl_FragCoord) +float4 load_subpasses(texture2d uInput, thread float4& gl_FragCoord) { return uInput.read(uint2(gl_FragCoord.xy)); } diff --git a/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag index 8f7b8ece..58985c63 100644 --- a/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag +++ b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag @@ -16,25 +16,25 @@ struct main0_in }; static inline __attribute__((always_inline)) -float Samp(thread const float3& uv, thread depth2d uTex, thread sampler uSamp) +float Samp(thread const float3& uv, depth2d uTex, sampler uSamp) { return uTex.sample_compare(uSamp, uv.xy, uv.z); } static inline __attribute__((always_inline)) -float Samp2(thread const float3& uv, thread depth2d uSampler, thread const sampler uSamplerSmplr, thread float3& vUV) +float Samp2(thread const float3& uv, depth2d uSampler, sampler uSamplerSmplr, thread float3& vUV) { return uSampler.sample_compare(uSamplerSmplr, vUV.xy, vUV.z); } static inline __attribute__((always_inline)) -float Samp3(thread const depth2d uT, thread const sampler uS, thread const float3& uv, thread float3& vUV) +float Samp3(depth2d uT, sampler uS, thread const float3& uv, thread float3& vUV) { return uT.sample_compare(uS, vUV.xy, vUV.z); } static inline __attribute__((always_inline)) -float Samp4(thread const depth2d uS, thread const sampler uSSmplr, thread const float3& uv, thread float3& vUV) +float Samp4(depth2d uS, sampler uSSmplr, thread const float3& uv, thread float3& vUV) { return uS.sample_compare(uSSmplr, vUV.xy, vUV.z); } diff --git a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag index fabf6c57..9e5dba8c 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag @@ -133,7 +133,7 @@ inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint } static inline __attribute__((always_inline)) -float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1dSmplr, constant uint& tex1dSwzl, thread texture2d tex2d, thread const sampler tex2dSmplr, constant uint& tex2dSwzl, thread texture3d tex3d, thread const sampler tex3dSmplr, constant uint& tex3dSwzl, thread texturecube texCube, thread const sampler texCubeSmplr, constant uint& texCubeSwzl, thread texture2d_array tex2dArray, thread const sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, thread texturecube_array texCubeArray, thread const sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, thread depth2d depth2d, thread const sampler depth2dSmplr, constant uint& depth2dSwzl, thread depthcube depthCube, thread const sampler depthCubeSmplr, constant uint& depthCubeSwzl, thread depth2d_array depth2dArray, thread const sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, thread depthcube_array depthCubeArray, thread const sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, thread texture2d texBuffer) +float4 doSwizzle(texture1d tex1d, sampler tex1dSmplr, constant uint& tex1dSwzl, texture2d tex2d, sampler tex2dSmplr, constant uint& tex2dSwzl, texture3d tex3d, sampler tex3dSmplr, constant uint& tex3dSwzl, texturecube texCube, sampler texCubeSmplr, constant uint& texCubeSwzl, texture2d_array tex2dArray, sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, texturecube_array texCubeArray, sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, depth2d depth2d, sampler depth2dSmplr, constant uint& depth2dSwzl, depthcube depthCube, sampler depthCubeSmplr, constant uint& depthCubeSwzl, depth2d_array depth2dArray, sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, depthcube_array depthCubeArray, sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, texture2d texBuffer) { float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl); diff --git a/reference/shaders-msl-no-opt/vert/functions_nested.vert b/reference/shaders-msl-no-opt/vert/functions_nested.vert index 8809dbd0..2d394f67 100644 --- a/reference/shaders-msl-no-opt/vert/functions_nested.vert +++ b/reference/shaders-msl-no-opt/vert/functions_nested.vert @@ -66,7 +66,7 @@ uint get_bits(thread const uint4& v, thread const int& swap) } static inline __attribute__((always_inline)) -float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d input_stream) +float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, texture2d input_stream) { float4 result = float4(0.0, 0.0, 0.0, 1.0); bool reverse_order = false; @@ -135,7 +135,7 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr } static inline __attribute__((always_inline)) -float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1) +float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d buff_in_2, texture2d buff_in_1) { int param = location; attr_desc desc = fetch_desc(param, v_227); @@ -155,7 +155,7 @@ float4 read_location(thread const int& location, constant VertexBuffer& v_227, t } static inline __attribute__((always_inline)) -void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1, constant VertexConstantsBuffer& v_309) +void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d buff_in_2, texture2d buff_in_1, constant VertexConstantsBuffer& v_309) { int param = 3; float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1); diff --git a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag index c3666120..e402bbb2 100644 --- a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag +++ b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag @@ -138,7 +138,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 do_samples(thread const texture1d t1, thread const sampler t1Smplr, constant uint& t1Swzl, thread const texture2d t2, constant uint& t2Swzl, thread const texture3d t3, thread const sampler t3Smplr, constant uint& t3Swzl, thread const texturecube tc, constant uint& tcSwzl, thread const texture2d_array t2a, thread const sampler t2aSmplr, constant uint& t2aSwzl, thread const texturecube_array tca, thread const sampler tcaSmplr, constant uint& tcaSwzl, thread const texture2d tb, thread const depth2d d2, thread const sampler d2Smplr, constant uint& d2Swzl, thread const depthcube dc, thread const sampler dcSmplr, constant uint& dcSwzl, thread const depth2d_array d2a, constant uint& d2aSwzl, thread const depthcube_array dca, thread const sampler dcaSmplr, constant uint& dcaSwzl, thread sampler defaultSampler, thread sampler shadowSampler) +float4 do_samples(texture1d t1, sampler t1Smplr, constant uint& t1Swzl, texture2d t2, constant uint& t2Swzl, texture3d t3, sampler t3Smplr, constant uint& t3Swzl, texturecube tc, constant uint& tcSwzl, texture2d_array t2a, sampler t2aSmplr, constant uint& t2aSwzl, texturecube_array tca, sampler tcaSmplr, constant uint& tcaSwzl, texture2d tb, depth2d d2, sampler d2Smplr, constant uint& d2Swzl, depthcube dc, sampler dcSmplr, constant uint& dcSwzl, depth2d_array d2a, constant uint& d2aSwzl, depthcube_array dca, sampler dcaSmplr, constant uint& dcaSwzl, sampler defaultSampler, sampler shadowSampler) { float4 c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl); c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0)), t2Swzl); diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp index fca572e3..53655639 100644 --- a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -6,7 +6,7 @@ using namespace metal; static inline __attribute__((always_inline)) -void _main(thread const uint3& id, thread texture2d TargetTexture) +void _main(thread const uint3& id, texture2d TargetTexture) { float2 loaded = TargetTexture.read(uint2(id.xy)).xy; float2 storeTemp = loaded + float2(1.0); diff --git a/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag b/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag index 01c670d9..46bdd3e4 100644 --- a/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag +++ b/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag @@ -21,14 +21,14 @@ struct main0_in }; static inline __attribute__((always_inline)) -void _108(int _109, thread texture2d v_8, device _7& v_10) +void _108(int _109, texture2d v_8, device _7& v_10) { int2 _113 = int2(_109 - 8 * (_109 / 8), _109 / 8); v_10._m0[_109] = v_8.read(uint2(_113), 0); } static inline __attribute__((always_inline)) -float4 _98(float4 _119, thread texture2d v_8, device _7& v_10) +float4 _98(float4 _119, texture2d v_8, device _7& v_10) { for (int _121 = 0; _121 < 64; _121++) { diff --git a/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag index 9e374c0a..df9f8f43 100644 --- a/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag +++ b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag @@ -21,13 +21,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -void _108(int _109, thread texture2d v_8, thread sampler v_9, device _7& v_10) +void _108(int _109, texture2d v_8, sampler v_9, device _7& v_10) { v_10._m0[_109] = v_8.sample(v_9, (float2(int2(_109 - 8 * (_109 / 8), _109 / 8)) / float2(8.0)), level(0.0)); } static inline __attribute__((always_inline)) -float4 _98(float4 _121, thread texture2d v_8, thread sampler v_9, device _7& v_10) +float4 _98(float4 _121, texture2d v_8, sampler v_9, device _7& v_10) { for (int _123 = 0; _123 < 64; _123++) { diff --git a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag index 01a0e5a7..e512bdca 100644 --- a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag +++ b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag @@ -16,13 +16,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -float sample_combined(thread float3& vUV, thread depth2d uShadow, thread const sampler uShadowSmplr) +float sample_combined(thread float3& vUV, depth2d uShadow, sampler uShadowSmplr) { return uShadow.sample_compare(uShadowSmplr, vUV.xy, vUV.z); } static inline __attribute__((always_inline)) -float sample_separate(thread float3& vUV, thread depth2d uTexture, thread sampler uSampler) +float sample_separate(thread float3& vUV, depth2d uTexture, sampler uSampler) { return uTexture.sample_compare(uSampler, vUV.xy, vUV.z); } diff --git a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag index b7005ff9..fd0a3def 100644 --- a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag +++ b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag @@ -63,7 +63,7 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_in_function2(thread texture2d uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array, 4> uTexture2, thread const array uSampler, thread const array, 2> uTextures, thread const array uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers) +float4 sample_in_function2(texture2d uTexture, sampler uTextureSmplr, thread float2& vUV, constant array, 4>& uTexture2, constant array& uSampler, constant array, 2>& uTextures, constant array& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers) { float4 ret = uTexture.sample(uTextureSmplr, vUV); ret += uTexture2[2].sample(uSampler[1], vUV); @@ -75,7 +75,7 @@ float4 sample_in_function2(thread texture2d uTexture, thread const sample } static inline __attribute__((always_inline)) -float4 sample_in_function(thread texture2d uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array, 4> uTexture2, thread const array uSampler, thread const array, 2> uTextures, thread const array uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4]) +float4 sample_in_function(texture2d uTexture, sampler uTextureSmplr, thread float2& vUV, constant array, 4>& uTexture2, constant array& uSampler, constant array, 2>& uTextures, constant array& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4]) { float4 ret = sample_in_function2(uTexture, uTextureSmplr, vUV, uTexture2, uSampler, uTextures, uTexturesSmplr, v_60, ssbos, registers); ret += v_90.ubo; diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag index 3e8ce2df..9c02d302 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag @@ -95,13 +95,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) +float4 sample_in_func(constant array, 4>& uSampler, constant array& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) { return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]); } static inline __attribute__((always_inline)) -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag index 87d33039..978ecbe1 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag @@ -86,13 +86,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) +float4 sample_in_func(thread const array, 4>& uSampler, thread const array& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV) { return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]); } static inline __attribute__((always_inline)) -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 9336688d..43a61e1f 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -83,19 +83,19 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_in_func_1(thread const array, 4> uSampler0, thread const array uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) +float4 sample_in_func_1(constant array, 4>& uSampler0, constant array& uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) { return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]); } static inline __attribute__((always_inline)) -float4 sample_in_func_2(thread float2& vUV, thread texture2d uSampler1, thread const sampler uSampler1Smplr, constant uint& uSampler1Swzl) +float4 sample_in_func_2(thread float2& vUV, texture2d uSampler1, sampler uSampler1Smplr, constant uint& uSampler1Swzl) { return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl); } static inline __attribute__((always_inline)) -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index e1cb4525..1db803c5 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -76,13 +76,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) +float4 sample_in_func(thread const array, 4>& uSampler, thread const array& uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) { return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]); } static inline __attribute__((always_inline)) -float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) +float4 sample_single_in_func(texture2d s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } diff --git a/reference/shaders-msl/frag/helper-invocation.msl21.frag b/reference/shaders-msl/frag/helper-invocation.msl21.frag index 97d69e19..7cf97a25 100644 --- a/reference/shaders-msl/frag/helper-invocation.msl21.frag +++ b/reference/shaders-msl/frag/helper-invocation.msl21.frag @@ -16,7 +16,7 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 foo(thread bool& gl_HelperInvocation, thread texture2d uSampler, thread const sampler uSamplerSmplr, thread float2& vUV) +float4 foo(thread bool& gl_HelperInvocation, texture2d uSampler, sampler uSamplerSmplr, thread float2& vUV) { float4 color; if (!gl_HelperInvocation) diff --git a/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag index 0e35c248..a3583584 100644 --- a/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag +++ b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag @@ -38,13 +38,13 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 samp_array(thread const array, 10000> uSamplers, thread const array uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000]) +float4 samp_array(const device array, 10000>& uSamplers, const device array& uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000]) { return uSamplers[9999].sample(uSamplersSmplr[9999], vUV) + vs[5000]->v; } static inline __attribute__((always_inline)) -float4 samp_single(thread float2& vUV, thread texture2d uSampler, thread const sampler uSamplerSmplr) +float4 samp_single(thread float2& vUV, texture2d uSampler, sampler uSamplerSmplr) { return uSampler.sample(uSamplerSmplr, vUV); } diff --git a/reference/shaders-msl/frag/image-query-lod.msl22.frag b/reference/shaders-msl/frag/image-query-lod.msl22.frag index 6e7991f2..2362597f 100644 --- a/reference/shaders-msl/frag/image-query-lod.msl22.frag +++ b/reference/shaders-msl/frag/image-query-lod.msl22.frag @@ -16,7 +16,7 @@ struct main0_in }; static inline __attribute__((always_inline)) -void from_function(thread float2& FragColor, thread texture2d uSampler2D, thread const sampler uSampler2DSmplr, thread float3& vUV, thread texture3d uSampler3D, thread const sampler uSampler3DSmplr, thread texturecube uSamplerCube, thread const sampler uSamplerCubeSmplr, thread texture2d uTexture2D, thread sampler uSampler, thread texture3d uTexture3D, thread texturecube uTextureCube) +void from_function(thread float2& FragColor, texture2d uSampler2D, sampler uSampler2DSmplr, thread float3& vUV, texture3d uSampler3D, sampler uSampler3DSmplr, texturecube uSamplerCube, sampler uSamplerCubeSmplr, texture2d uTexture2D, sampler uSampler, texture3d uTexture3D, texturecube uTextureCube) { float2 _22; _22.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, vUV.xy); diff --git a/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag index f7b1441f..3f91c2c4 100644 --- a/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag +++ b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer) +float4 load_subpasses(texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer) { float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID); return _24; diff --git a/reference/shaders-msl/frag/input-attachment-ms.frag b/reference/shaders-msl/frag/input-attachment-ms.frag index 5e1f504a..97629d8e 100644 --- a/reference/shaders-msl/frag/input-attachment-ms.frag +++ b/reference/shaders-msl/frag/input-attachment-ms.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d_ms uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord) +float4 load_subpasses(texture2d_ms uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord) { float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_SampleID); return _24; diff --git a/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag index 5e8c5339..1b6b9f6a 100644 --- a/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag +++ b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) +float4 load_subpasses(texture2d_ms_array uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) { float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID); return _24; diff --git a/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag index 76d0a3cf..934abd58 100644 --- a/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag +++ b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_Layer) +float4 load_subpasses(texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_Layer) { return uInput.read(uint2(gl_FragCoord.xy), gl_Layer); } diff --git a/reference/shaders-msl/frag/input-attachment.frag b/reference/shaders-msl/frag/input-attachment.frag index 93bbaec0..0643acfa 100644 --- a/reference/shaders-msl/frag/input-attachment.frag +++ b/reference/shaders-msl/frag/input-attachment.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d uInput, thread float4& gl_FragCoord) +float4 load_subpasses(texture2d uInput, thread float4& gl_FragCoord) { return uInput.read(uint2(gl_FragCoord.xy)); } diff --git a/reference/shaders-msl/frag/input-attachment.multiview.frag b/reference/shaders-msl/frag/input-attachment.multiview.frag index 931790b8..6ba24216 100644 --- a/reference/shaders-msl/frag/input-attachment.multiview.frag +++ b/reference/shaders-msl/frag/input-attachment.multiview.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 load_subpasses(thread const texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) +float4 load_subpasses(texture2d_array uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex) { return uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex); } diff --git a/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag index a093d3f4..f0bf396c 100644 --- a/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag +++ b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag @@ -16,19 +16,19 @@ struct main0_in }; static inline __attribute__((always_inline)) -float sample_normal2(thread const depth2d tex, thread sampler uSampler, thread float3& vUV) +float sample_normal2(depth2d tex, sampler uSampler, thread float3& vUV) { return float4(tex.sample(uSampler, vUV.xy)).x; } static inline __attribute__((always_inline)) -float sample_normal(thread const depth2d tex, thread sampler uSampler, thread float3& vUV) +float sample_normal(depth2d tex, sampler uSampler, thread float3& vUV) { return sample_normal2(tex, uSampler, vUV); } static inline __attribute__((always_inline)) -float sample_comp(thread const depth2d tex, thread float3& vUV, thread sampler uSamplerShadow) +float sample_comp(depth2d tex, thread float3& vUV, sampler uSamplerShadow) { return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z); } diff --git a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag index 115ed9fa..27653a06 100644 --- a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag +++ b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag @@ -11,13 +11,13 @@ struct main0_out }; static inline __attribute__((always_inline)) -float sample_depth_from_function(thread const depth2d uT, thread const sampler uS) +float sample_depth_from_function(depth2d uT, sampler uS) { return uT.sample_compare(uS, float3(0.5).xy, 0.5); } static inline __attribute__((always_inline)) -float sample_color_from_function(thread const texture2d uT, thread const sampler uS) +float sample_color_from_function(texture2d uT, sampler uS) { return uT.sample(uS, float2(0.5)).x; } diff --git a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag index dec6d051..8d1934d4 100644 --- a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag +++ b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag @@ -17,19 +17,19 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array, 4> uSampler, thread const array uSamplerSmplr) +float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array, 4>& uSampler, thread const array& uSamplerSmplr) { return uSampler[vIndex].sample(uSamplerSmplr[vIndex], (vTex + float2(0.100000001490116119384765625))); } static inline __attribute__((always_inline)) -float4 sample_from_argument(thread const array, 4> samplers, thread const array samplersSmplr, thread int& vIndex, thread float2& vTex) +float4 sample_from_argument(thread const array, 4>& samplers, thread const array& samplersSmplr, thread int& vIndex, thread float2& vTex) { return samplers[vIndex].sample(samplersSmplr[vIndex], (vTex + float2(0.20000000298023223876953125))); } static inline __attribute__((always_inline)) -float4 sample_single_from_argument(thread const texture2d samp, thread const sampler sampSmplr, thread float2& vTex) +float4 sample_single_from_argument(texture2d samp, sampler sampSmplr, thread float2& vTex) { return samp.sample(sampSmplr, (vTex + float2(0.300000011920928955078125))); } diff --git a/reference/shaders-msl/frag/sampler.frag b/reference/shaders-msl/frag/sampler.frag index 6484161b..84743fbd 100644 --- a/reference/shaders-msl/frag/sampler.frag +++ b/reference/shaders-msl/frag/sampler.frag @@ -17,7 +17,7 @@ struct main0_in }; static inline __attribute__((always_inline)) -float4 sample_texture(thread const texture2d tex, thread const sampler texSmplr, thread const float2& uv) +float4 sample_texture(texture2d tex, sampler texSmplr, thread const float2& uv) { return tex.sample(texSmplr, uv); } diff --git a/reference/shaders-msl/frag/separate-image-sampler-argument.frag b/reference/shaders-msl/frag/separate-image-sampler-argument.frag index d196243d..208f5d90 100644 --- a/reference/shaders-msl/frag/separate-image-sampler-argument.frag +++ b/reference/shaders-msl/frag/separate-image-sampler-argument.frag @@ -11,7 +11,7 @@ struct main0_out }; static inline __attribute__((always_inline)) -float4 samp(thread const texture2d t, thread const sampler s) +float4 samp(texture2d t, sampler s) { return t.sample(s, float2(0.5)); } diff --git a/reference/shaders-msl/tese/water_tess.tese b/reference/shaders-msl/tese/water_tess.tese index 7ecbb166..5f63d94f 100644 --- a/reference/shaders-msl/tese/water_tess.tese +++ b/reference/shaders-msl/tese/water_tess.tese @@ -45,7 +45,7 @@ float2 lod_factor(thread const float2& tess_coord, thread float4& vPatchLods) } static inline __attribute__((always_inline)) -float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, thread texture2d uHeightmapDisplacement, thread const sampler uHeightmapDisplacementSmplr) +float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr) { return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y)); } diff --git a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert index 8ab252f5..fad06d6a 100644 --- a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert +++ b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert @@ -23,7 +23,7 @@ struct constant_block constant int arraySize = SPIRV_CROSS_CONSTANT_ID_0; static inline __attribute__((always_inline)) -void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array, 3> images) +void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array, 3>& images) { storage[0]->baz = uint4(constants[3]->foo); storage[1]->quux = images[2].read(uint2(int2(constants[1]->bar))).xy; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index e8af45f3..91f1c91f 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -9377,7 +9377,20 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) // Manufacture automatic sampler arg for SampledImage texture if (arg_type.image.dim != DimBuffer) - decl += join(", thread const ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id)); + { + if (arg_type.array.empty()) + { + decl += join(", ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id)); + } + else + { + const char *sampler_address_space = + descriptor_address_space(name_id, + StorageClassUniformConstant, + "thread const"); + decl += join(", ", sampler_address_space, " ", sampler_type(arg_type, arg.id), "& ", to_sampler_expression(arg.id)); + } + } } // Manufacture automatic swizzle arg. @@ -12665,6 +12678,39 @@ bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type); } +const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const +{ + if (msl_options.argument_buffers) + { + bool storage_class_is_descriptor = storage == StorageClassUniform || + storage == StorageClassStorageBuffer || + storage == StorageClassUniformConstant; + + uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); + if (storage_class_is_descriptor && descriptor_set_is_argument_buffer(desc_set)) + { + // An awkward case where we need to emit *more* address space declarations (yay!). + // An example is where we pass down an array of buffer pointers to leaf functions. + // It's a constant array containing pointers to constants. + // The pointer array is always constant however. E.g. + // device SSBO * constant (&array)[N]. + // const device SSBO * constant (&array)[N]. + // constant SSBO * constant (&array)[N]. + // However, this only matters for argument buffers, since for MSL 1.0 style codegen, + // we emit the buffer array on stack instead, and that seems to work just fine apparently. + + // If the argument was marked as being in device address space, any pointer to member would + // be const device, not constant. + if (argument_buffer_device_storage_mask & (1u << desc_set)) + return "const device"; + else + return "constant"; + } + } + + return plain_address_space; +} + string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) { auto &var = get(arg.id); @@ -12683,15 +12729,14 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // Framebuffer fetch is plain value, const looks out of place, but it is not wrong. if (type_is_msl_framebuffer_fetch(type)) constref = false; + else if (type_storage == StorageClassUniformConstant) + constref = true; bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler; - // Arrays of images/samplers in MSL are always const. - if (!type.array.empty() && type_is_image) - constref = true; - - const char *cv_qualifier = constref ? "const " : ""; + // For opaque types we handle const later due to descriptor address spaces. + const char *cv_qualifier = (constref && !type_is_image) ? "const " : ""; string decl; // If this is a combined image-sampler for a 2D image with floating-point type, @@ -12763,9 +12808,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl = join(cv_qualifier, type_to_glsl(type, arg.id)); } - bool opaque_handle = type_storage == StorageClassUniformConstant; - - if (!builtin && !opaque_handle && !is_pointer && + if (!builtin && !is_pointer && (type_storage == StorageClassFunction || type_storage == StorageClassGeneric)) { // If the argument is a pure value and not an opaque type, we will pass by value. @@ -12800,33 +12843,15 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else if (is_array(type) && !type_is_image) { - // Arrays of images and samplers are special cased. + // Arrays of opaque types are special cased. if (!address_space.empty()) decl = join(address_space, " ", decl); - if (msl_options.argument_buffers) + const char *argument_buffer_space = descriptor_address_space(name_id, type_storage, nullptr); + if (argument_buffer_space) { - uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet); - if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && - descriptor_set_is_argument_buffer(desc_set)) - { - // An awkward case where we need to emit *more* address space declarations (yay!). - // An example is where we pass down an array of buffer pointers to leaf functions. - // It's a constant array containing pointers to constants. - // The pointer array is always constant however. E.g. - // device SSBO * constant (&array)[N]. - // const device SSBO * constant (&array)[N]. - // constant SSBO * constant (&array)[N]. - // However, this only matters for argument buffers, since for MSL 1.0 style codegen, - // we emit the buffer array on stack instead, and that seems to work just fine apparently. - - // If the argument was marked as being in device address space, any pointer to member would - // be const device, not constant. - if (argument_buffer_device_storage_mask & (1u << desc_set)) - decl += " const device"; - else - decl += " constant"; - } + decl += " "; + decl += argument_buffer_space; } // Special case, need to override the array size here if we're using tess level as an argument. @@ -12870,7 +12895,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } } } - else if (!opaque_handle && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) + else if (!type_is_image && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) { // If this is going to be a reference to a variable pointer, the address space // for the reference has to go before the '&', but after the '*'. @@ -12890,6 +12915,27 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += to_restrict(name_id); decl += to_expression(name_id); } + else if (type_is_image) + { + if (type.array.empty()) + { + // For non-arrayed types we can just pass opaque descriptors by value. + // This fixes problems if descriptors are passed by value from argument buffers and plain descriptors + // in same shader. + // There is no address space we can actually use, but value will work. + // This will break if applications attempt to pass down descriptor arrays as arguments, but + // fortunately that is extremely unlikely ... + decl += " "; + decl += to_expression(name_id); + } + else + { + const char *img_address_space = descriptor_address_space(name_id, type_storage, "thread const"); + decl = join(img_address_space, " ", decl); + decl += "& "; + decl += to_expression(name_id); + } + } else { if (!address_space.empty()) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index e065519c..6591e47c 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -872,6 +872,7 @@ protected: std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); std::string argument_decl(const SPIRFunction::Parameter &arg); + const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const;