MSL: Rethink how opaque descriptors are passed to leaf functions.

We were passing arrays by value which the compiler fails to optimize,
causing abyssal performance. To fix this, we need to consider that
descriptors can be in constant or const device address spaces.

Also, lone descriptors are passed by value, so we explicitly remove address
space qualifiers.

One failure case is when shader passes a texture/sampler array as an
argument. It's all UniformConstant in SPIR-V, but in MSL it might be
thread, const device or constant, so that won't work ...
Global variable use works fine though, and that should cover 99.9999999%
of use cases.
This commit is contained in:
Hans-Kristian Arntzen 2022-01-18 14:34:00 +01:00
parent 08d5f5ed18
commit 5b952d2cbf
33 changed files with 131 additions and 84 deletions

View File

@ -6,7 +6,7 @@
using namespace metal;
static inline __attribute__((always_inline))
void _main(thread const uint3& id, thread texture2d<float, access::read_write> TargetTexture)
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d<float> uInput, thread float4& gl_FragCoord)
float4 load_subpasses(texture2d<float> uInput, thread float4& gl_FragCoord)
{
return uInput.read(uint2(gl_FragCoord.xy));
}

View File

@ -16,25 +16,25 @@ struct main0_in
};
static inline __attribute__((always_inline))
float Samp(thread const float3& uv, thread depth2d<float> uTex, thread sampler uSamp)
float Samp(thread const float3& uv, depth2d<float> uTex, sampler uSamp)
{
return uTex.sample_compare(uSamp, uv.xy, uv.z);
}
static inline __attribute__((always_inline))
float Samp2(thread const float3& uv, thread depth2d<float> uSampler, thread const sampler uSamplerSmplr, thread float3& vUV)
float Samp2(thread const float3& uv, depth2d<float> uSampler, sampler uSamplerSmplr, thread float3& vUV)
{
return uSampler.sample_compare(uSamplerSmplr, vUV.xy, vUV.z);
}
static inline __attribute__((always_inline))
float Samp3(thread const depth2d<float> uT, thread const sampler uS, thread const float3& uv, thread float3& vUV)
float Samp3(depth2d<float> uT, sampler uS, thread const float3& uv, thread float3& vUV)
{
return uT.sample_compare(uS, vUV.xy, vUV.z);
}
static inline __attribute__((always_inline))
float Samp4(thread const depth2d<float> uS, thread const sampler uSSmplr, thread const float3& uv, thread float3& vUV)
float Samp4(depth2d<float> uS, sampler uSSmplr, thread const float3& uv, thread float3& vUV)
{
return uS.sample_compare(uSSmplr, vUV.xy, vUV.z);
}

View File

@ -133,7 +133,7 @@ inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler s, uint
}
static inline __attribute__((always_inline))
float4 doSwizzle(thread texture1d<float> tex1d, thread const sampler tex1dSmplr, constant uint& tex1dSwzl, thread texture2d<float> tex2d, thread const sampler tex2dSmplr, constant uint& tex2dSwzl, thread texture3d<float> tex3d, thread const sampler tex3dSmplr, constant uint& tex3dSwzl, thread texturecube<float> texCube, thread const sampler texCubeSmplr, constant uint& texCubeSwzl, thread texture2d_array<float> tex2dArray, thread const sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, thread texturecube_array<float> texCubeArray, thread const sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, thread depth2d<float> depth2d, thread const sampler depth2dSmplr, constant uint& depth2dSwzl, thread depthcube<float> depthCube, thread const sampler depthCubeSmplr, constant uint& depthCubeSwzl, thread depth2d_array<float> depth2dArray, thread const sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, thread depthcube_array<float> depthCubeArray, thread const sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, thread texture2d<float> texBuffer)
float4 doSwizzle(texture1d<float> tex1d, sampler tex1dSmplr, constant uint& tex1dSwzl, texture2d<float> tex2d, sampler tex2dSmplr, constant uint& tex2dSwzl, texture3d<float> tex3d, sampler tex3dSmplr, constant uint& tex3dSwzl, texturecube<float> texCube, sampler texCubeSmplr, constant uint& texCubeSwzl, texture2d_array<float> tex2dArray, sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, texturecube_array<float> texCubeArray, sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, depth2d<float> depth2d, sampler depth2dSmplr, constant uint& depth2dSwzl, depthcube<float> depthCube, sampler depthCubeSmplr, constant uint& depthCubeSwzl, depth2d_array<float> depth2dArray, sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, depthcube_array<float> depthCubeArray, sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, texture2d<float> texBuffer)
{
float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl);
c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl);

View File

@ -66,7 +66,7 @@ uint get_bits(thread const uint4& v, thread const int& swap)
}
static inline __attribute__((always_inline))
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, texture2d<uint> input_stream)
{
float4 result = float4(0.0, 0.0, 0.0, 1.0);
bool reverse_order = false;
@ -135,7 +135,7 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr
}
static inline __attribute__((always_inline))
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d<uint> buff_in_2, texture2d<uint> buff_in_1)
{
int param = location;
attr_desc desc = fetch_desc(param, v_227);
@ -155,7 +155,7 @@ float4 read_location(thread const int& location, constant VertexBuffer& v_227, t
}
static inline __attribute__((always_inline))
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d<uint> buff_in_2, texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
{
int param = 3;
float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);

View File

@ -138,7 +138,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 do_samples(thread const texture1d<float> t1, thread const sampler t1Smplr, constant uint& t1Swzl, thread const texture2d<float> t2, constant uint& t2Swzl, thread const texture3d<float> t3, thread const sampler t3Smplr, constant uint& t3Swzl, thread const texturecube<float> tc, constant uint& tcSwzl, thread const texture2d_array<float> t2a, thread const sampler t2aSmplr, constant uint& t2aSwzl, thread const texturecube_array<float> tca, thread const sampler tcaSmplr, constant uint& tcaSwzl, thread const texture2d<float> tb, thread const depth2d<float> d2, thread const sampler d2Smplr, constant uint& d2Swzl, thread const depthcube<float> dc, thread const sampler dcSmplr, constant uint& dcSwzl, thread const depth2d_array<float> d2a, constant uint& d2aSwzl, thread const depthcube_array<float> dca, thread const sampler dcaSmplr, constant uint& dcaSwzl, thread sampler defaultSampler, thread sampler shadowSampler)
float4 do_samples(texture1d<float> t1, sampler t1Smplr, constant uint& t1Swzl, texture2d<float> t2, constant uint& t2Swzl, texture3d<float> t3, sampler t3Smplr, constant uint& t3Swzl, texturecube<float> tc, constant uint& tcSwzl, texture2d_array<float> t2a, sampler t2aSmplr, constant uint& t2aSwzl, texturecube_array<float> tca, sampler tcaSmplr, constant uint& tcaSwzl, texture2d<float> tb, depth2d<float> d2, sampler d2Smplr, constant uint& d2Swzl, depthcube<float> dc, sampler dcSmplr, constant uint& dcSwzl, depth2d_array<float> d2a, constant uint& d2aSwzl, depthcube_array<float> dca, sampler dcaSmplr, constant uint& dcaSwzl, sampler defaultSampler, sampler shadowSampler)
{
float4 c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl);
c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0)), t2Swzl);

View File

@ -6,7 +6,7 @@
using namespace metal;
static inline __attribute__((always_inline))
void _main(thread const uint3& id, thread texture2d<float, access::read_write> TargetTexture)
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);

View File

@ -21,14 +21,14 @@ struct main0_in
};
static inline __attribute__((always_inline))
void _108(int _109, thread texture2d<float> v_8, device _7& v_10)
void _108(int _109, texture2d<float> v_8, device _7& v_10)
{
int2 _113 = int2(_109 - 8 * (_109 / 8), _109 / 8);
v_10._m0[_109] = v_8.read(uint2(_113), 0);
}
static inline __attribute__((always_inline))
float4 _98(float4 _119, thread texture2d<float> v_8, device _7& v_10)
float4 _98(float4 _119, texture2d<float> v_8, device _7& v_10)
{
for (int _121 = 0; _121 < 64; _121++)
{

View File

@ -21,13 +21,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
void _108(int _109, thread texture2d<float> v_8, thread sampler v_9, device _7& v_10)
void _108(int _109, texture2d<float> v_8, sampler v_9, device _7& v_10)
{
v_10._m0[_109] = v_8.sample(v_9, (float2(int2(_109 - 8 * (_109 / 8), _109 / 8)) / float2(8.0)), level(0.0));
}
static inline __attribute__((always_inline))
float4 _98(float4 _121, thread texture2d<float> v_8, thread sampler v_9, device _7& v_10)
float4 _98(float4 _121, texture2d<float> v_8, sampler v_9, device _7& v_10)
{
for (int _123 = 0; _123 < 64; _123++)
{

View File

@ -16,13 +16,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
float sample_combined(thread float3& vUV, thread depth2d<float> uShadow, thread const sampler uShadowSmplr)
float sample_combined(thread float3& vUV, depth2d<float> uShadow, sampler uShadowSmplr)
{
return uShadow.sample_compare(uShadowSmplr, vUV.xy, vUV.z);
}
static inline __attribute__((always_inline))
float sample_separate(thread float3& vUV, thread depth2d<float> uTexture, thread sampler uSampler)
float sample_separate(thread float3& vUV, depth2d<float> uTexture, sampler uSampler)
{
return uTexture.sample_compare(uSampler, vUV.xy, vUV.z);
}

View File

@ -63,7 +63,7 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_in_function2(thread texture2d<float> uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array<texture2d<float>, 4> uTexture2, thread const array<sampler, 2> uSampler, thread const array<texture2d<float>, 2> uTextures, thread const array<sampler, 2> uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers)
float4 sample_in_function2(texture2d<float> uTexture, sampler uTextureSmplr, thread float2& vUV, constant array<texture2d<float>, 4>& uTexture2, constant array<sampler, 2>& uSampler, constant array<texture2d<float>, 2>& uTextures, constant array<sampler, 2>& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers)
{
float4 ret = uTexture.sample(uTextureSmplr, vUV);
ret += uTexture2[2].sample(uSampler[1], vUV);
@ -75,7 +75,7 @@ float4 sample_in_function2(thread texture2d<float> uTexture, thread const sample
}
static inline __attribute__((always_inline))
float4 sample_in_function(thread texture2d<float> uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array<texture2d<float>, 4> uTexture2, thread const array<sampler, 2> uSampler, thread const array<texture2d<float>, 2> uTextures, thread const array<sampler, 2> uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4])
float4 sample_in_function(texture2d<float> uTexture, sampler uTextureSmplr, thread float2& vUV, constant array<texture2d<float>, 4>& uTexture2, constant array<sampler, 2>& uSampler, constant array<texture2d<float>, 2>& uTextures, constant array<sampler, 2>& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4])
{
float4 ret = sample_in_function2(uTexture, uTextureSmplr, vUV, uTexture2, uSampler, uTextures, uTexturesSmplr, v_60, ssbos, registers);
ret += v_90.ubo;

View File

@ -95,13 +95,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_in_func(thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
float4 sample_in_func(constant array<texture2d<float>, 4>& uSampler, constant array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
{
return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]);
}
static inline __attribute__((always_inline))
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}

View File

@ -86,13 +86,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_in_func(thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
float4 sample_in_func(thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
{
return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]);
}
static inline __attribute__((always_inline))
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}

View File

@ -83,19 +83,19 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_in_func_1(thread const array<texture2d<float>, 4> uSampler0, thread const array<sampler, 4> uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV)
float4 sample_in_func_1(constant array<texture2d<float>, 4>& uSampler0, constant array<sampler, 4>& uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV)
{
return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]);
}
static inline __attribute__((always_inline))
float4 sample_in_func_2(thread float2& vUV, thread texture2d<float> uSampler1, thread const sampler uSampler1Smplr, constant uint& uSampler1Swzl)
float4 sample_in_func_2(thread float2& vUV, texture2d<float> uSampler1, sampler uSampler1Smplr, constant uint& uSampler1Swzl)
{
return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl);
}
static inline __attribute__((always_inline))
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}

View File

@ -76,13 +76,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_in_func(thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV)
float4 sample_in_func(thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV)
{
return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]);
}
static inline __attribute__((always_inline))
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}

View File

@ -16,7 +16,7 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 foo(thread bool& gl_HelperInvocation, thread texture2d<float> uSampler, thread const sampler uSamplerSmplr, thread float2& vUV)
float4 foo(thread bool& gl_HelperInvocation, texture2d<float> uSampler, sampler uSamplerSmplr, thread float2& vUV)
{
float4 color;
if (!gl_HelperInvocation)

View File

@ -38,13 +38,13 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 samp_array(thread const array<texture2d<float>, 10000> uSamplers, thread const array<sampler, 10000> uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000])
float4 samp_array(const device array<texture2d<float>, 10000>& uSamplers, const device array<sampler, 10000>& uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000])
{
return uSamplers[9999].sample(uSamplersSmplr[9999], vUV) + vs[5000]->v;
}
static inline __attribute__((always_inline))
float4 samp_single(thread float2& vUV, thread texture2d<float> uSampler, thread const sampler uSamplerSmplr)
float4 samp_single(thread float2& vUV, texture2d<float> uSampler, sampler uSamplerSmplr)
{
return uSampler.sample(uSamplerSmplr, vUV);
}

View File

@ -16,7 +16,7 @@ struct main0_in
};
static inline __attribute__((always_inline))
void from_function(thread float2& FragColor, thread texture2d<float> uSampler2D, thread const sampler uSampler2DSmplr, thread float3& vUV, thread texture3d<float> uSampler3D, thread const sampler uSampler3DSmplr, thread texturecube<float> uSamplerCube, thread const sampler uSamplerCubeSmplr, thread texture2d<float> uTexture2D, thread sampler uSampler, thread texture3d<float> uTexture3D, thread texturecube<float> uTextureCube)
void from_function(thread float2& FragColor, texture2d<float> uSampler2D, sampler uSampler2DSmplr, thread float3& vUV, texture3d<float> uSampler3D, sampler uSampler3DSmplr, texturecube<float> uSamplerCube, sampler uSamplerCubeSmplr, texture2d<float> uTexture2D, sampler uSampler, texture3d<float> uTexture3D, texturecube<float> uTextureCube)
{
float2 _22;
_22.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, vUV.xy);

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer)
float4 load_subpasses(texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer)
{
float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID);
return _24;

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d_ms<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord)
float4 load_subpasses(texture2d_ms<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord)
{
float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_SampleID);
return _24;

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
float4 load_subpasses(texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
{
float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID);
return _24;

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_Layer)
float4 load_subpasses(texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_Layer)
{
return uInput.read(uint2(gl_FragCoord.xy), gl_Layer);
}

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d<float> uInput, thread float4& gl_FragCoord)
float4 load_subpasses(texture2d<float> uInput, thread float4& gl_FragCoord)
{
return uInput.read(uint2(gl_FragCoord.xy));
}

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 load_subpasses(thread const texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
float4 load_subpasses(texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
{
return uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex);
}

View File

@ -16,19 +16,19 @@ struct main0_in
};
static inline __attribute__((always_inline))
float sample_normal2(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
float sample_normal2(depth2d<float> tex, sampler uSampler, thread float3& vUV)
{
return float4(tex.sample(uSampler, vUV.xy)).x;
}
static inline __attribute__((always_inline))
float sample_normal(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
float sample_normal(depth2d<float> tex, sampler uSampler, thread float3& vUV)
{
return sample_normal2(tex, uSampler, vUV);
}
static inline __attribute__((always_inline))
float sample_comp(thread const depth2d<float> tex, thread float3& vUV, thread sampler uSamplerShadow)
float sample_comp(depth2d<float> tex, thread float3& vUV, sampler uSamplerShadow)
{
return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z);
}

View File

@ -11,13 +11,13 @@ struct main0_out
};
static inline __attribute__((always_inline))
float sample_depth_from_function(thread const depth2d<float> uT, thread const sampler uS)
float sample_depth_from_function(depth2d<float> uT, sampler uS)
{
return uT.sample_compare(uS, float3(0.5).xy, 0.5);
}
static inline __attribute__((always_inline))
float sample_color_from_function(thread const texture2d<float> uT, thread const sampler uS)
float sample_color_from_function(texture2d<float> uT, sampler uS)
{
return uT.sample(uS, float2(0.5)).x;
}

View File

@ -17,19 +17,19 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr)
float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr)
{
return uSampler[vIndex].sample(uSamplerSmplr[vIndex], (vTex + float2(0.100000001490116119384765625)));
}
static inline __attribute__((always_inline))
float4 sample_from_argument(thread const array<texture2d<float>, 4> samplers, thread const array<sampler, 4> samplersSmplr, thread int& vIndex, thread float2& vTex)
float4 sample_from_argument(thread const array<texture2d<float>, 4>& samplers, thread const array<sampler, 4>& samplersSmplr, thread int& vIndex, thread float2& vTex)
{
return samplers[vIndex].sample(samplersSmplr[vIndex], (vTex + float2(0.20000000298023223876953125)));
}
static inline __attribute__((always_inline))
float4 sample_single_from_argument(thread const texture2d<float> samp, thread const sampler sampSmplr, thread float2& vTex)
float4 sample_single_from_argument(texture2d<float> samp, sampler sampSmplr, thread float2& vTex)
{
return samp.sample(sampSmplr, (vTex + float2(0.300000011920928955078125)));
}

View File

@ -17,7 +17,7 @@ struct main0_in
};
static inline __attribute__((always_inline))
float4 sample_texture(thread const texture2d<float> tex, thread const sampler texSmplr, thread const float2& uv)
float4 sample_texture(texture2d<float> tex, sampler texSmplr, thread const float2& uv)
{
return tex.sample(texSmplr, uv);
}

View File

@ -11,7 +11,7 @@ struct main0_out
};
static inline __attribute__((always_inline))
float4 samp(thread const texture2d<float> t, thread const sampler s)
float4 samp(texture2d<float> t, sampler s)
{
return t.sample(s, float2(0.5));
}

View File

@ -45,7 +45,7 @@ float2 lod_factor(thread const float2& tess_coord, thread float4& vPatchLods)
}
static inline __attribute__((always_inline))
float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, thread texture2d<float> uHeightmapDisplacement, thread const sampler uHeightmapDisplacementSmplr)
float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d<float> uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr)
{
return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y));
}

View File

@ -23,7 +23,7 @@ struct constant_block
constant int arraySize = SPIRV_CROSS_CONSTANT_ID_0;
static inline __attribute__((always_inline))
void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array<texture2d<int>, 3> images)
void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array<texture2d<int>, 3>& images)
{
storage[0]->baz = uint4(constants[3]->foo);
storage[1]->quux = images[2].read(uint2(int2(constants[1]->bar))).xy;

View File

@ -9377,7 +9377,20 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
// Manufacture automatic sampler arg for SampledImage texture
if (arg_type.image.dim != DimBuffer)
decl += join(", thread const ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id));
{
if (arg_type.array.empty())
{
decl += join(", ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id));
}
else
{
const char *sampler_address_space =
descriptor_address_space(name_id,
StorageClassUniformConstant,
"thread const");
decl += join(", ", sampler_address_space, " ", sampler_type(arg_type, arg.id), "& ", to_sampler_expression(arg.id));
}
}
}
// Manufacture automatic swizzle arg.
@ -12665,6 +12678,39 @@ bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const
return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type);
}
const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const
{
if (msl_options.argument_buffers)
{
bool storage_class_is_descriptor = storage == StorageClassUniform ||
storage == StorageClassStorageBuffer ||
storage == StorageClassUniformConstant;
uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
if (storage_class_is_descriptor && descriptor_set_is_argument_buffer(desc_set))
{
// An awkward case where we need to emit *more* address space declarations (yay!).
// An example is where we pass down an array of buffer pointers to leaf functions.
// It's a constant array containing pointers to constants.
// The pointer array is always constant however. E.g.
// device SSBO * constant (&array)[N].
// const device SSBO * constant (&array)[N].
// constant SSBO * constant (&array)[N].
// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
// we emit the buffer array on stack instead, and that seems to work just fine apparently.
// If the argument was marked as being in device address space, any pointer to member would
// be const device, not constant.
if (argument_buffer_device_storage_mask & (1u << desc_set))
return "const device";
else
return "constant";
}
}
return plain_address_space;
}
string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
{
auto &var = get<SPIRVariable>(arg.id);
@ -12683,15 +12729,14 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
// Framebuffer fetch is plain value, const looks out of place, but it is not wrong.
if (type_is_msl_framebuffer_fetch(type))
constref = false;
else if (type_storage == StorageClassUniformConstant)
constref = true;
bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
type.basetype == SPIRType::Sampler;
// Arrays of images/samplers in MSL are always const.
if (!type.array.empty() && type_is_image)
constref = true;
const char *cv_qualifier = constref ? "const " : "";
// For opaque types we handle const later due to descriptor address spaces.
const char *cv_qualifier = (constref && !type_is_image) ? "const " : "";
string decl;
// If this is a combined image-sampler for a 2D image with floating-point type,
@ -12763,9 +12808,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
decl = join(cv_qualifier, type_to_glsl(type, arg.id));
}
bool opaque_handle = type_storage == StorageClassUniformConstant;
if (!builtin && !opaque_handle && !is_pointer &&
if (!builtin && !is_pointer &&
(type_storage == StorageClassFunction || type_storage == StorageClassGeneric))
{
// If the argument is a pure value and not an opaque type, we will pass by value.
@ -12800,33 +12843,15 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
else if (is_array(type) && !type_is_image)
{
// Arrays of images and samplers are special cased.
// Arrays of opaque types are special cased.
if (!address_space.empty())
decl = join(address_space, " ", decl);
if (msl_options.argument_buffers)
const char *argument_buffer_space = descriptor_address_space(name_id, type_storage, nullptr);
if (argument_buffer_space)
{
uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet);
if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) &&
descriptor_set_is_argument_buffer(desc_set))
{
// An awkward case where we need to emit *more* address space declarations (yay!).
// An example is where we pass down an array of buffer pointers to leaf functions.
// It's a constant array containing pointers to constants.
// The pointer array is always constant however. E.g.
// device SSBO * constant (&array)[N].
// const device SSBO * constant (&array)[N].
// constant SSBO * constant (&array)[N].
// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
// we emit the buffer array on stack instead, and that seems to work just fine apparently.
// If the argument was marked as being in device address space, any pointer to member would
// be const device, not constant.
if (argument_buffer_device_storage_mask & (1u << desc_set))
decl += " const device";
else
decl += " constant";
}
decl += " ";
decl += argument_buffer_space;
}
// Special case, need to override the array size here if we're using tess level as an argument.
@ -12870,7 +12895,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
}
}
else if (!opaque_handle && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct))
else if (!type_is_image && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct))
{
// If this is going to be a reference to a variable pointer, the address space
// for the reference has to go before the '&', but after the '*'.
@ -12890,6 +12915,27 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
decl += to_restrict(name_id);
decl += to_expression(name_id);
}
else if (type_is_image)
{
if (type.array.empty())
{
// For non-arrayed types we can just pass opaque descriptors by value.
// This fixes problems if descriptors are passed by value from argument buffers and plain descriptors
// in same shader.
// There is no address space we can actually use, but value will work.
// This will break if applications attempt to pass down descriptor arrays as arguments, but
// fortunately that is extremely unlikely ...
decl += " ";
decl += to_expression(name_id);
}
else
{
const char *img_address_space = descriptor_address_space(name_id, type_storage, "thread const");
decl = join(img_address_space, " ", decl);
decl += "& ";
decl += to_expression(name_id);
}
}
else
{
if (!address_space.empty())

View File

@ -872,6 +872,7 @@ protected:
std::string member_attribute_qualifier(const SPIRType &type, uint32_t index);
std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index);
std::string argument_decl(const SPIRFunction::Parameter &arg);
const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const;
std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp);
uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0);
uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const;