Merge pull request #2306 from KhronosGroup/pr-2292

MSL: Implement support for EXT_mutable_descriptor_type and general aliasing with argument buffers
This commit is contained in:
Hans-Kristian Arntzen 2024-04-03 15:16:49 +02:00 committed by GitHub
commit 06407561ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 766 additions and 162 deletions

View File

@ -5,10 +5,13 @@
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -0,0 +1,162 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T>
struct spvDescriptor
{
T value;
};
template<typename T>
struct spvDescriptorArray
{
spvDescriptorArray(const device spvDescriptor<T>* ptr) : ptr(ptr)
{
}
const device T& operator [] (size_t i) const
{
return ptr[i].value;
}
const device spvDescriptor<T>* ptr;
};
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10
{
float v;
};
struct B11
{
float v;
};
struct B20
{
float v;
};
struct B21
{
float v;
};
struct B30
{
uint i;
};
struct B31
{
float v;
};
struct B40
{
float v;
};
struct B41
{
float v;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
constant float4 _477 = {};
struct spvDescriptorSetBuffer0
{
array<texture2d<float>, 8> t00 [[id(0)]];
// Overlapping binding: array<texture2d<uint>, 8> t01 [[id(0)]];
// Overlapping binding: array<texture2d<int>, 8> t02 [[id(0)]];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u0 [[id(0)]];
// Overlapping binding: array<sampler, 8> s00 [[id(0)]];
};
struct spvDescriptorSetBuffer1
{
spvDescriptor<device B30 *> b30 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<constant B31 *> b31 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture2d<uint>> t31 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture2d<int>> t32 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture_buffer<uint, access::read_write>> u3 [[id(0)]][1] /* unsized array hack */;
};
struct spvDescriptorSetBuffer2
{
device B20* b20 [[id(0)]][8];
// Overlapping binding: constant B21* b21 [[id(0)]][8];
// Overlapping binding: array<texture2d<uint>, 8> t21 [[id(0)]];
// Overlapping binding: array<texture2d<int>, 8> t22 [[id(0)]];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u2 [[id(0)]];
};
struct spvDescriptorSetBuffer3
{
device B10* b10 [[id(0)]][8];
// Overlapping binding: constant B11* b11 [[id(0)]][8];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u1 [[id(0)]];
};
struct spvDescriptorSetBuffer4
{
device B40* b40 [[id(0)]];
// Overlapping binding: constant B41* b41 [[id(0)]];
// Overlapping binding: texture2d<uint> t41 [[id(0)]];
// Overlapping binding: texture2d<int> t42 [[id(0)]];
// Overlapping binding: texture_buffer<uint, access::read_write> u4 [[id(0)]];
};
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]], constant spvDescriptorSetBuffer3& spvDescriptorSet3 [[buffer(3)]], constant spvDescriptorSetBuffer4& spvDescriptorSet4 [[buffer(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
spvDescriptorArray<device B30*> b30 {spvDescriptorSet1.b30};
spvDescriptorArray<texture2d<uint>> t31 {reinterpret_cast<const device spvDescriptor<texture2d<uint>>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<texture2d<int>> t32 {reinterpret_cast<const device spvDescriptor<texture2d<int>>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<constant B31*> b31 {reinterpret_cast<spvDescriptor<constant B31 *> const device *>(&spvDescriptorSet1.b30)};
spvDescriptorArray<texture_buffer<uint, access::read_write>> u3 {reinterpret_cast<const device spvDescriptor<texture_buffer<uint, access::read_write>>*>(&spvDescriptorSet1.b30)};
const device auto &t01 = reinterpret_cast<const device array<texture2d<uint>, 8> &>(spvDescriptorSet0.t00);
const device auto &t02 = reinterpret_cast<const device array<texture2d<int>, 8> &>(spvDescriptorSet0.t00);
const device auto &u0 = reinterpret_cast<const device array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet0.t00);
const device auto &s00 = reinterpret_cast<const device array<sampler, 8> &>(spvDescriptorSet0.t00);
constant auto &b21 = reinterpret_cast<constant B21* constant (&)[8]>(spvDescriptorSet2.b20);
constant auto &t21 = reinterpret_cast<constant array<texture2d<uint>, 8> &>(spvDescriptorSet2.b20);
constant auto &t22 = reinterpret_cast<constant array<texture2d<int>, 8> &>(spvDescriptorSet2.b20);
constant auto &u2 = reinterpret_cast<constant array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet2.b20);
constant auto &b11 = reinterpret_cast<constant B11* constant (&)[8]>(spvDescriptorSet3.b10);
constant auto &u1 = reinterpret_cast<constant array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet3.b10);
constant auto &b41 = *reinterpret_cast<constant B41* constant &>(spvDescriptorSet4.b40);
constant auto &t41 = reinterpret_cast<constant texture2d<uint> &>(spvDescriptorSet4.b40);
constant auto &t42 = reinterpret_cast<constant texture2d<int> &>(spvDescriptorSet4.b40);
constant auto &u4 = reinterpret_cast<constant texture_buffer<uint, access::read_write> &>(spvDescriptorSet4.b40);
float4 _292 = spvDescriptorSet0.t00[0].sample(s00[3], float2(0.0), level(0.0));
_292.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
_292.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
spvImageFence(u0[2]);
_292.z = as_type<float>(u0[2].read(uint(0)).x);
float4 _448;
_448.x = spvDescriptorSet3.b10[3]->v;
_448.y = b11[4]->v;
spvImageFence(u1[2]);
_448.z = as_type<float>(u1[2].read(uint(0)).x);
float _342 = spvDescriptorSet2.b20[3]->v;
spvImageFence(u2[2]);
uint _356 = b30[gl_WorkGroupID.x]->i;
uint _388 = _356 + 6u;
spvImageFence(u3[_388]);
float _410 = (*spvDescriptorSet4.b40).v;
spvImageFence(u4);
u0[0].write(as_type<uint4>(_292), uint(0));
u1[0].write(as_type<uint4>(_448), uint(0));
u2[0].write(as_type<uint4>(float4(as_type<float>(t21[1].read(uint2(int2(0)), 0).x), as_type<float>(t22[2].read(uint2(int2(0)), 0).x), _342 + as_type<float>(u2[2].read(uint(0)).x), b21[4]->v)), uint(0));
u3[0].write(as_type<uint4>(float4(as_type<float>(t31[_356 + 2u].read(uint2(int2(0)), 0).x), as_type<float>(t32[_356 + 3u].read(uint2(int2(0)), 0).x), b31[_356 + 5u]->v, as_type<float>(u3[_388].read(uint(0)).x))), uint(0));
u4.write(as_type<uint4>(float4(as_type<float>(t41.read(uint2(int2(0)), 0).x), as_type<float>(t42.read(uint2(int2(0)), 0).x), _410 + b41.v, as_type<float>(u4.read(uint(0)).x))), uint(0));
}

View File

@ -1,3 +1,4 @@
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include <metal_stdlib>
#include <simd/simd.h>
@ -87,25 +88,31 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
struct spvDescriptorSetBuffer0
{
device SSBO_A* ssbo_a [[id(0)]];
constant UBO_C* ubo_c [[id(1)]];
// Overlapping binding: constant UBO_D* ubo_d [[id(1)]];
device SSBO_As* ssbo_as [[id(2)]][4];
// Overlapping binding: device SSBO_Bs* ssbo_bs [[id(2)]][4];
// Overlapping binding: const device SSBO_BsRO* ssbo_bs_readonly [[id(2)]][4];
constant UBO_Cs* ubo_cs [[id(6)]][4];
// Overlapping binding: constant UBO_Ds* ubo_ds [[id(6)]][4];
device SSBO_A* ssbo_a [[id(10)]];
// Overlapping binding: device SSBO_B* ssbo_b [[id(10)]];
// Overlapping binding: const device SSBO_BRO* ssbo_b_readonly [[id(10)]];
};
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding11 [[buffer(11)]], constant void* spvBufferAliasSet2Binding12 [[buffer(12)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as;
const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs;
const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as;
device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding11;
constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding12;
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding11;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding12;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding11;
constant auto &ubo_d = *reinterpret_cast<constant UBO_D* const device &>(spvDescriptorSet0.ubo_c);
const device auto &ssbo_bs = reinterpret_cast<device SSBO_Bs* const device (&)[4]>(spvDescriptorSet0.ssbo_as);
const device auto &ssbo_bs_readonly = reinterpret_cast<const device SSBO_BsRO* const device (&)[4]>(spvDescriptorSet0.ssbo_as);
const device auto &ubo_ds = reinterpret_cast<constant UBO_Ds* const device (&)[4]>(spvDescriptorSet0.ubo_cs);
device auto &ssbo_b = *reinterpret_cast<device SSBO_B* const device &>(spvDescriptorSet0.ssbo_a);
const device auto &ssbo_b_readonly = *reinterpret_cast<const device SSBO_BRO* const device &>(spvDescriptorSet0.ssbo_a);
(*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg;
ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x;

View File

@ -89,8 +89,14 @@ struct spvDescriptorSetBuffer0
{
device SSBO_A* ssbo_a [[id(0)]];
constant UBO_C* ubo_c [[id(1)]];
device SSBO_As* ssbo_as [[id(2)]][4];
constant UBO_Cs* ubo_cs [[id(6)]][4];
device SSBO_B* ssbo_b [[id(2)]];
constant UBO_D* ubo_d [[id(3)]];
const device SSBO_BRO* ssbo_b_readonly [[id(4)]];
device SSBO_As* ssbo_as [[id(5)]][4];
constant UBO_Cs* ubo_cs [[id(9)]][4];
device SSBO_Bs* ssbo_bs [[id(13)]][4];
constant UBO_Ds* ubo_ds [[id(17)]][4];
const device SSBO_BsRO* ssbo_bs_readonly [[id(21)]][4];
};
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
@ -100,16 +106,10 @@ kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as;
constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs;
constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as;
(*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg;
ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
(*spvDescriptorSet0.ssbo_b).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_d).data[gl_WorkGroupID.y].xy + (*spvDescriptorSet0.ssbo_b_readonly).data[gl_GlobalInvocationID.x];
spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x;
ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
spvDescriptorSet0.ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_ds[gl_WorkGroupID.x]->data[0].xy + spvDescriptorSet0.ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x;
ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
}

View File

@ -1,11 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
uImageArray.fence();
spvImageFence(uImageArray);
uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
}

View File

@ -5,10 +5,13 @@
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
static inline __attribute__((always_inline))
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
{
TargetTexture.fence();
spvImageFence(TargetTexture);
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
float2 storeTemp = loaded + float2(1.0);
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));

View File

@ -0,0 +1,196 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T>
struct spvDescriptor
{
T value;
};
template<typename T>
struct spvDescriptorArray
{
spvDescriptorArray(const device spvDescriptor<T>* ptr) : ptr(ptr)
{
}
const device T& operator [] (size_t i) const
{
return ptr[i].value;
}
const device spvDescriptor<T>* ptr;
};
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
struct B10
{
float v;
};
struct B11
{
float v;
};
struct B20
{
float v;
};
struct B21
{
float v;
};
struct B30
{
uint i;
};
struct B31
{
float v;
};
struct B40
{
float v;
};
struct B41
{
float v;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
struct spvDescriptorSetBuffer0
{
array<texture2d<float>, 8> t00 [[id(0)]];
// Overlapping binding: array<texture2d<uint>, 8> t01 [[id(0)]];
// Overlapping binding: array<texture2d<int>, 8> t02 [[id(0)]];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u0 [[id(0)]];
// Overlapping binding: array<sampler, 8> s00 [[id(0)]];
};
struct spvDescriptorSetBuffer1
{
spvDescriptor<device B30 *> b30 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<constant B31 *> b31 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture2d<float>> t30 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture2d<uint>> t31 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture2d<int>> t32 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<texture_buffer<uint, access::read_write>> u3 [[id(0)]][1] /* unsized array hack */;
// Overlapping binding: spvDescriptor<sampler> s30 [[id(0)]][1] /* unsized array hack */;
};
struct spvDescriptorSetBuffer2
{
device B20* b20 [[id(0)]][8];
// Overlapping binding: constant B21* b21 [[id(0)]][8];
// Overlapping binding: array<texture2d<float>, 8> t20 [[id(0)]];
// Overlapping binding: array<texture2d<uint>, 8> t21 [[id(0)]];
// Overlapping binding: array<texture2d<int>, 8> t22 [[id(0)]];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u2 [[id(0)]];
// Overlapping binding: array<sampler, 8> s20 [[id(0)]];
};
struct spvDescriptorSetBuffer3
{
device B10* b10 [[id(0)]][8];
// Overlapping binding: constant B11* b11 [[id(0)]][8];
// Overlapping binding: array<texture_buffer<uint, access::read_write>, 8> u1 [[id(0)]];
};
struct spvDescriptorSetBuffer4
{
device B40* b40 [[id(0)]];
// Overlapping binding: constant B41* b41 [[id(0)]];
// Overlapping binding: depth2d<float> t40 [[id(0)]];
// Overlapping binding: texture2d<uint> t41 [[id(0)]];
// Overlapping binding: texture2d<int> t42 [[id(0)]];
// Overlapping binding: texture_buffer<uint, access::read_write> u4 [[id(0)]];
// Overlapping binding: sampler s40 [[id(0)]];
};
static inline __attribute__((always_inline))
void in_function(thread float4& r0, const device array<texture2d<float>, 8>& t00, const device array<sampler, 8>& s00, const device array<texture2d<uint>, 8>& t01, const device array<texture2d<int>, 8>& t02, const device array<texture_buffer<uint, access::read_write>, 8>& u0, thread float4& r1, device B10* constant (&b10)[8], constant B11* constant (&b11)[8], constant array<texture_buffer<uint, access::read_write>, 8>& u1, thread float4& r2, constant array<texture2d<float>, 8>& t20, constant array<sampler, 8>& s20, constant array<texture2d<uint>, 8>& t21, constant array<texture2d<int>, 8>& t22, device B20* constant (&b20)[8], constant array<texture_buffer<uint, access::read_write>, 8>& u2, constant B21* constant (&b21)[8], const spvDescriptorArray<device B30*> b30, thread uint3& gl_WorkGroupID, thread float4& r3, const spvDescriptorArray<texture2d<float>> t30, const spvDescriptorArray<sampler> s30, const spvDescriptorArray<texture2d<uint>> t31, const spvDescriptorArray<texture2d<int>> t32, const spvDescriptorArray<constant B31*> b31, const spvDescriptorArray<texture_buffer<uint, access::read_write>> u3, thread float4& r4, depth2d<float> t40, sampler s40, texture2d<uint> t41, texture2d<int> t42, device B40& b40, constant B41& b41, texture_buffer<uint, access::read_write> u4)
{
r0 = t00[0].sample(s00[3], float2(0.0), level(0.0));
r0.x = as_type<float>(t01[1].read(uint2(int2(0)), 0).x);
r0.y = as_type<float>(t02[2].read(uint2(int2(0)), 0).x);
spvImageFence(u0[2]);
r0.z = as_type<float>(u0[2].read(uint(0)).x);
r1.x = b10[3]->v;
r1.y = b11[4]->v;
spvImageFence(u1[2]);
r1.z = as_type<float>(u1[2].read(uint(0)).x);
r2 = t20[0].sample(s20[3], float2(0.0), level(0.0));
r2.x = as_type<float>(t21[1].read(uint2(int2(0)), 0).x);
r2.y = as_type<float>(t22[2].read(uint2(int2(0)), 0).x);
spvImageFence(u2[2]);
r2.z = b20[3]->v + as_type<float>(u2[2].read(uint(0)).x);
r2.w = b21[4]->v;
uint i = b30[gl_WorkGroupID.x]->i;
r3 = t30[i].sample(s30[i + 1u], float2(0.0), level(0.0));
r3.x = as_type<float>(t31[i + 2u].read(uint2(int2(0)), 0).x);
r3.y = as_type<float>(t32[i + 3u].read(uint2(int2(0)), 0).x);
r3.z = b31[i + 5u]->v;
uint _218 = i + 6u;
spvImageFence(u3[_218]);
r3.w = as_type<float>(u3[_218].read(uint(0)).x);
r4 = float4(t40.sample(s40, float2(0.0), level(0.0)));
r4.x = as_type<float>(t41.read(uint2(int2(0)), 0).x);
r4.y = as_type<float>(t42.read(uint2(int2(0)), 0).x);
r4.z = b40.v + b41.v;
spvImageFence(u4);
r4.w = as_type<float>(u4.read(uint(0)).x);
u0[0].write(as_type<uint4>(r0), uint(0));
u1[0].write(as_type<uint4>(r1), uint(0));
u2[0].write(as_type<uint4>(r2), uint(0));
u3[0].write(as_type<uint4>(r3), uint(0));
u4.write(as_type<uint4>(r4), uint(0));
}
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]], constant spvDescriptorSetBuffer3& spvDescriptorSet3 [[buffer(3)]], constant spvDescriptorSetBuffer4& spvDescriptorSet4 [[buffer(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
spvDescriptorArray<device B30*> b30 {spvDescriptorSet1.b30};
spvDescriptorArray<texture2d<float>> t30 {reinterpret_cast<const device spvDescriptor<texture2d<float>>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<sampler> s30 {reinterpret_cast<const device spvDescriptor<sampler>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<texture2d<uint>> t31 {reinterpret_cast<const device spvDescriptor<texture2d<uint>>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<texture2d<int>> t32 {reinterpret_cast<const device spvDescriptor<texture2d<int>>*>(&spvDescriptorSet1.b30)};
spvDescriptorArray<constant B31*> b31 {reinterpret_cast<spvDescriptor<constant B31 *> const device *>(&spvDescriptorSet1.b30)};
spvDescriptorArray<texture_buffer<uint, access::read_write>> u3 {reinterpret_cast<const device spvDescriptor<texture_buffer<uint, access::read_write>>*>(&spvDescriptorSet1.b30)};
const device auto &t01 = reinterpret_cast<const device array<texture2d<uint>, 8> &>(spvDescriptorSet0.t00);
const device auto &t02 = reinterpret_cast<const device array<texture2d<int>, 8> &>(spvDescriptorSet0.t00);
const device auto &u0 = reinterpret_cast<const device array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet0.t00);
const device auto &s00 = reinterpret_cast<const device array<sampler, 8> &>(spvDescriptorSet0.t00);
constant auto &b21 = reinterpret_cast<constant B21* constant (&)[8]>(spvDescriptorSet2.b20);
constant auto &t20 = reinterpret_cast<constant array<texture2d<float>, 8> &>(spvDescriptorSet2.b20);
constant auto &t21 = reinterpret_cast<constant array<texture2d<uint>, 8> &>(spvDescriptorSet2.b20);
constant auto &t22 = reinterpret_cast<constant array<texture2d<int>, 8> &>(spvDescriptorSet2.b20);
constant auto &u2 = reinterpret_cast<constant array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet2.b20);
constant auto &s20 = reinterpret_cast<constant array<sampler, 8> &>(spvDescriptorSet2.b20);
constant auto &b11 = reinterpret_cast<constant B11* constant (&)[8]>(spvDescriptorSet3.b10);
constant auto &u1 = reinterpret_cast<constant array<texture_buffer<uint, access::read_write>, 8> &>(spvDescriptorSet3.b10);
constant auto &b41 = *reinterpret_cast<constant B41* constant &>(spvDescriptorSet4.b40);
constant auto &t40 = reinterpret_cast<constant depth2d<float> &>(spvDescriptorSet4.b40);
constant auto &t41 = reinterpret_cast<constant texture2d<uint> &>(spvDescriptorSet4.b40);
constant auto &t42 = reinterpret_cast<constant texture2d<int> &>(spvDescriptorSet4.b40);
constant auto &u4 = reinterpret_cast<constant texture_buffer<uint, access::read_write> &>(spvDescriptorSet4.b40);
constant auto &s40 = reinterpret_cast<constant sampler &>(spvDescriptorSet4.b40);
float4 r0;
float4 r1;
float4 r2;
float4 r3;
float4 r4;
in_function(r0, spvDescriptorSet0.t00, s00, t01, t02, u0, r1, spvDescriptorSet3.b10, b11, u1, r2, t20, s20, t21, t22, spvDescriptorSet2.b20, u2, b21, b30, gl_WorkGroupID, r3, t30, s30, t31, t32, b31, u3, r4, t40, s40, t41, t42, (*spvDescriptorSet4.b40), b41, u4);
}

View File

@ -1,4 +1,5 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include <metal_stdlib>
#include <simd/simd.h>
@ -89,10 +90,16 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
struct spvDescriptorSetBuffer0
{
device SSBO_A* ssbo_a [[id(0)]];
constant UBO_C* ubo_c [[id(1)]];
// Overlapping binding: constant UBO_D* ubo_d [[id(1)]];
device SSBO_As* ssbo_as [[id(2)]][4];
// Overlapping binding: device SSBO_Bs* ssbo_bs [[id(2)]][4];
// Overlapping binding: const device SSBO_BsRO* ssbo_bs_readonly [[id(2)]][4];
constant UBO_Cs* ubo_cs [[id(6)]][4];
// Overlapping binding: constant UBO_Ds* ubo_ds [[id(6)]][4];
device SSBO_A* ssbo_a [[id(10)]];
// Overlapping binding: device SSBO_B* ssbo_b [[id(10)]];
// Overlapping binding: const device SSBO_BRO* ssbo_b_readonly [[id(10)]];
};
static inline __attribute__((always_inline))
@ -121,19 +128,19 @@ void func3(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, de
ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
}
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding11 [[buffer(11)]], constant void* spvBufferAliasSet2Binding12 [[buffer(12)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as;
const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs;
const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as;
device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding11;
constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding12;
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding11;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding12;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding11;
constant auto &ubo_d = *reinterpret_cast<constant UBO_D* const device &>(spvDescriptorSet0.ubo_c);
const device auto &ssbo_bs = reinterpret_cast<device SSBO_Bs* const device (&)[4]>(spvDescriptorSet0.ssbo_as);
const device auto &ssbo_bs_readonly = reinterpret_cast<const device SSBO_BsRO* const device (&)[4]>(spvDescriptorSet0.ssbo_as);
const device auto &ubo_ds = reinterpret_cast<constant UBO_Ds* const device (&)[4]>(spvDescriptorSet0.ubo_cs);
device auto &ssbo_b = *reinterpret_cast<device SSBO_B* const device &>(spvDescriptorSet0.ssbo_a);
const device auto &ssbo_b_readonly = *reinterpret_cast<const device SSBO_BRO* const device &>(spvDescriptorSet0.ssbo_a);
func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, _42, ssbo_b, ubo_d, ssbo_b_readonly);
func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs);
func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly);

View File

@ -91,8 +91,14 @@ struct spvDescriptorSetBuffer0
{
device SSBO_A* ssbo_a [[id(0)]];
constant UBO_C* ubo_c [[id(1)]];
device SSBO_As* ssbo_as [[id(2)]][4];
constant UBO_Cs* ubo_cs [[id(6)]][4];
device SSBO_B* ssbo_b [[id(2)]];
constant UBO_D* ubo_d [[id(3)]];
const device SSBO_BRO* ssbo_b_readonly [[id(4)]];
device SSBO_As* ssbo_as [[id(5)]][4];
constant UBO_Cs* ubo_cs [[id(9)]][4];
device SSBO_Bs* ssbo_bs [[id(13)]][4];
constant UBO_Ds* ubo_ds [[id(17)]][4];
const device SSBO_BsRO* ssbo_bs_readonly [[id(21)]][4];
};
static inline __attribute__((always_inline))
@ -128,15 +134,9 @@ kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0
device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as;
constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs;
constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as;
func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, _42, ssbo_b, ubo_d, ssbo_b_readonly);
func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, _42, (*spvDescriptorSet0.ssbo_b), (*spvDescriptorSet0.ubo_d), (*spvDescriptorSet0.ssbo_b_readonly));
func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs);
func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly);
func2(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_bs, spvDescriptorSet0.ubo_ds, spvDescriptorSet0.ssbo_bs_readonly);
func3(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_e, ubo_g, ssbo_f, ubo_h, ssbo_i);
}

View File

@ -1,12 +1,17 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template <typename ImageT>
void spvImageFence(ImageT img) { img.fence(); }
fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
{
float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
uImageArray.fence();
spvImageFence(uImageArray);
float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
uImage.write(a, uint2(int2(2, 3)));
uImageArray.write(b, uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));

View File

@ -0,0 +1,89 @@
#version 450
#extension GL_EXT_samplerless_texture_functions : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform samplerShadow s00[8];
layout(set = 0, binding = 0) uniform texture2D t00[8];
layout(set = 0, binding = 0) uniform utexture2D t01[8];
layout(set = 0, binding = 0) uniform itexture2D t02[8];
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer u0[8];
layout(set = 3, binding = 0) buffer B10 { float v; } b10[8];
layout(set = 3, binding = 0) uniform B11 { float v; } b11[8];
layout(set = 3, binding = 0, r32ui) uniform uimageBuffer u1[8];
layout(set = 2, binding = 0) uniform samplerShadow s20[8];
layout(set = 2, binding = 0) uniform texture2D t20[8];
layout(set = 2, binding = 0) uniform utexture2D t21[8];
layout(set = 2, binding = 0) uniform itexture2D t22[8];
layout(set = 2, binding = 0) buffer B20 { float v; } b20[8];
layout(set = 2, binding = 0) uniform B21 { float v; } b21[8];
layout(set = 2, binding = 0, r32ui) uniform uimageBuffer u2[8];
layout(set = 1, binding = 0) uniform samplerShadow s30[];
layout(set = 1, binding = 0) uniform texture2D t30[];
layout(set = 1, binding = 0) uniform utexture2D t31[];
layout(set = 1, binding = 0) uniform itexture2D t32[];
layout(set = 1, binding = 0) buffer B30 { uint i; } b30[];
layout(set = 1, binding = 0) uniform B31 { float v; } b31[];
layout(set = 1, binding = 0, r32ui) uniform uimageBuffer u3[];
layout(set = 4, binding = 0) uniform samplerShadow s40;
layout(set = 4, binding = 0) uniform texture2D t40;
layout(set = 4, binding = 0) uniform utexture2D t41;
layout(set = 4, binding = 0) uniform itexture2D t42;
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer u4;
layout(set = 4, binding = 0) buffer B40 { float v; } b40;
layout(set = 4, binding = 0) uniform B41 { float v; } b41;
vec4 r0;
vec4 r1;
vec4 r2;
vec4 r3;
vec4 r4;
void in_function()
{
r0 = textureLod(sampler2D(t00[0u], s00[3u]), vec4(0.0).xy, 0.0);
r0.x = uintBitsToFloat(texelFetch(t01[1u], ivec2(0), 0).x);
r0.y = intBitsToFloat(texelFetch(t02[2u], ivec2(0), 0).x);
r0.z = uintBitsToFloat(imageLoad(u0[2u], 0).x);
r1.x = b10[3u].v;
r1.y = b11[4u].v;
r1.z = uintBitsToFloat(imageLoad(u1[2u], 0).x);
r2 = textureLod(sampler2D(t20[0u], s20[3u]), vec4(0.0).xy, 0.0);
r2.x = uintBitsToFloat(texelFetch(t21[1u], ivec2(0), 0).x);
r2.y = intBitsToFloat(texelFetch(t22[2u], ivec2(0), 0).x);
r2.z = b20[3u].v + uintBitsToFloat(imageLoad(u2[2u], 0).x);
r2.w = b21[4u].v;
uint i = b30[gl_WorkGroupID.x].i;
r3 = textureLod(sampler2D(t30[i], s30[i+1u]), vec4(0.0).xy, 0.0);
r3.x = uintBitsToFloat(texelFetch(t31[i+2u], ivec2(0), 0).x);
r3.y = intBitsToFloat(texelFetch(t32[i+3u], ivec2(0), 0).x);
r3.z = b31[i+5u].v;
r3.w = uintBitsToFloat(imageLoad(u3[i+6u], 0).x); // TODO: Calls fence() on const device&, which is not supported.
r4 = textureLod(sampler2D(t40, s40), vec4(0.0).xy, 0.0);
r4.x = uintBitsToFloat(texelFetch(t41, ivec2(0), 0).x);
r4.y = intBitsToFloat(texelFetch(t42, ivec2(0), 0).x);
r4.z = b40.v + b41.v;
r4.w = uintBitsToFloat(imageLoad(u4, 0).x); // TODO: Calls fence() on const device&, which is not supported.
imageStore(u0[0u], 0, floatBitsToUint(r0));
imageStore(u1[0u], 0, floatBitsToUint(r1));
imageStore(u2[0u], 0, floatBitsToUint(r2));
imageStore(u3[0u], 0, floatBitsToUint(r3));
imageStore(u4, 0, floatBitsToUint(r4));
}
void main()
{
in_function();
}

View File

@ -1,17 +1,17 @@
#version 450
layout(local_size_x = 64) in;
layout(set = 0, binding = 0) buffer SSBO_A
layout(set = 0, binding = 10) buffer SSBO_A
{
float data[];
} ssbo_a;
layout(set = 0, binding = 0) buffer SSBO_B
layout(set = 0, binding = 10) buffer SSBO_B
{
uvec2 data[];
} ssbo_b;
layout(set = 0, binding = 0) readonly buffer SSBO_BRO
layout(set = 0, binding = 10) readonly buffer SSBO_BRO
{
uvec2 data[];
} ssbo_b_readonly;
@ -41,37 +41,37 @@ layout(set = 0, binding = 2) readonly buffer SSBO_BsRO
uvec2 data[1024];
} ssbo_bs_readonly[4];
layout(set = 0, binding = 3) uniform UBO_Cs
layout(set = 0, binding = 6) uniform UBO_Cs
{
float data[1024];
} ubo_cs[4];
layout(set = 0, binding = 3) uniform UBO_Ds
layout(set = 0, binding = 6) uniform UBO_Ds
{
uvec2 data[1024];
} ubo_ds[4];
layout(set = 2, binding = 0) buffer SSBO_E
layout(set = 2, binding = 11) buffer SSBO_E
{
float data[];
} ssbo_e;
layout(set = 2, binding = 0) buffer SSBO_F
layout(set = 2, binding = 11) buffer SSBO_F
{
uvec2 data[];
} ssbo_f;
layout(set = 2, binding = 1) uniform UBO_G
layout(set = 2, binding = 12) uniform UBO_G
{
float data[1024];
} ubo_g;
layout(set = 2, binding = 1) uniform UBO_H
layout(set = 2, binding = 12) uniform UBO_H
{
uvec2 data[1024];
} ubo_h;
layout(set = 2, binding = 0) readonly buffer SSBO_I
layout(set = 2, binding = 11) readonly buffer SSBO_I
{
uvec2 data[];
} ssbo_i;

View File

@ -1671,6 +1671,8 @@ enum ExtendedDecorations
// lack of constructors in the 'threadgroup' address space.
SPIRVCrossDecorationWorkgroupStruct,
SPIRVCrossDecorationOverlappingBinding,
SPIRVCrossDecorationCount
};

View File

@ -1373,6 +1373,7 @@ void CompilerMSL::emit_entry_point_declarations()
const auto &type = get_variable_data_type(var);
const auto &buffer_type = get_variable_element_type(var);
const string name = to_name(var.self);
if (is_var_runtime_size_array(var))
{
if (msl_options.argument_buffers_tier < Options::ArgumentBuffersTier::Tier2)
@ -1391,10 +1392,10 @@ void CompilerMSL::emit_entry_point_declarations()
case SPIRType::Image:
case SPIRType::Sampler:
case SPIRType::AccelerationStructure:
statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", resource_name, "};");
statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};");
break;
case SPIRType::SampledImage:
statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", resource_name, "};");
statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};");
// Unsupported with argument buffer for now.
statement("spvDescriptorArray<sampler> ", name, "Smplr {", name, "Smplr_};");
break;
@ -5340,6 +5341,8 @@ void CompilerMSL::emit_header()
// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
if (suppress_missing_prototypes)
statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
if (suppress_incompatible_pointer_types_discard_qualifiers)
statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
// Disable warning about missing braces for array<T> template to make arrays a value type
if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
@ -7489,6 +7492,12 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplImageFence:
statement("template <typename ImageT>");
statement("void spvImageFence(ImageT img) { img.fence(); }");
statement("");
break;
default:
break;
}
@ -8965,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
// Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
statement(to_expression(img_id), ".fence();");
{
add_spv_func_and_recompile(SPVFuncImplImageFence);
// Need to wrap this with a value type,
// since the Metal headers are broken and do not consider case when the image is a reference.
statement("spvImageFence(", to_expression(img_id), ");");
}
emit_texture_op(instruction, false);
break;
@ -12293,7 +12307,11 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
else
decl_type = type_to_glsl(*declared_type, orig_id, true);
auto result = join(pack_pfx, decl_type, " ", qualifier,
const char *overlapping_binding_tag =
has_extended_member_decoration(type.self, index, SPIRVCrossDecorationOverlappingBinding) ?
"// Overlapping binding: " : "";
auto result = join(overlapping_binding_tag, pack_pfx, decl_type, " ", qualifier,
to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";");
is_using_builtin_array = false;
@ -13484,7 +13502,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
struct Resource
{
SPIRVariable *var;
SPIRVariable *descriptor_alias;
SPIRVariable *discrete_descriptor_alias;
string name;
SPIRType::BaseType basetype;
uint32_t index;
@ -13519,9 +13537,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
}
}
// Handle descriptor aliasing. We can handle aliasing of buffers by casting pointers,
// but not for typed resources.
SPIRVariable *descriptor_alias = nullptr;
// Handle descriptor aliasing of simple discrete cases.
// We can handle aliasing of buffers by casting pointers.
// The amount of aliasing we can perform for discrete descriptors is very limited.
// For fully mutable-style aliasing, we need argument buffers where we can exploit the fact
// that descriptors are all 8 bytes.
SPIRVariable *discrete_descriptor_alias = nullptr;
if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer)
{
for (auto &resource : resources)
@ -13534,10 +13555,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
(resource.var->storage == StorageClassUniform ||
resource.var->storage == StorageClassStorageBuffer))
{
descriptor_alias = resource.var;
discrete_descriptor_alias = resource.var;
// Self-reference marks that we should declare the resource,
// and it's being used as an alias (so we can emit void* instead).
resource.descriptor_alias = resource.var;
resource.discrete_descriptor_alias = resource.var;
// Need to promote interlocked usage so that the primary declaration is correct.
if (interlocked_resources.count(var_id))
interlocked_resources.insert(resource.var->self);
@ -13574,13 +13595,13 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
entry_point_bindings.push_back(&var);
for (uint32_t i = 0; i < plane_count; i++)
resources.push_back({ &var, descriptor_alias, to_name(var_id), SPIRType::Image,
get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index });
resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), SPIRType::Image,
get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index });
if (type.image.dim != DimBuffer && !constexpr_sampler)
{
resources.push_back({ &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler,
get_metal_resource_index(var, SPIRType::Sampler), 0, 0 });
resources.push_back({&var, discrete_descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler,
get_metal_resource_index(var, SPIRType::Sampler), 0, 0 });
}
}
else if (!constexpr_sampler)
@ -13590,12 +13611,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
// Don't allocate resource indices for aliases.
uint32_t resource_index = ~0u;
if (!descriptor_alias)
if (!discrete_descriptor_alias)
resource_index = get_metal_resource_index(var, type.basetype);
entry_point_bindings.push_back(&var);
resources.push_back({ &var, descriptor_alias, to_name(var_id), type.basetype,
resource_index, 0, secondary_index });
resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), type.basetype,
resource_index, 0, secondary_index });
}
}
});
@ -13619,9 +13640,9 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
if (m.members.size() == 0)
break;
if (r.descriptor_alias)
if (r.discrete_descriptor_alias)
{
if (r.var == r.descriptor_alias)
if (r.var == r.discrete_descriptor_alias)
{
auto primary_name = join("spvBufferAliasSet",
get_decoration(var_id, DecorationDescriptorSet),
@ -17851,6 +17872,101 @@ bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const
return is_supported_type && !type_is_msl_framebuffer_fetch(type);
}
void CompilerMSL::emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var,
const SPIRVariable &base_var)
{
// To deal with buffer <-> image aliasing, we need to perform an unholy UB ritual.
// A texture type in Metal 3.0 is a pointer. However, we cannot simply cast a pointer to texture.
// What we *can* do is to cast pointer-to-pointer to pointer-to-texture.
// We need to explicitly reach into the descriptor buffer lvalue, not any spvDescriptorArray wrapper.
auto *var_meta = ir.find_meta(base_var.self);
bool old_explicit_qualifier = var_meta && var_meta->decoration.qualified_alias_explicit_override;
if (var_meta)
var_meta->decoration.qualified_alias_explicit_override = false;
auto unqualified_name = to_name(base_var.self, false);
if (var_meta)
var_meta->decoration.qualified_alias_explicit_override = old_explicit_qualifier;
// For non-arrayed buffers, we have already performed a de-reference.
// We need a proper lvalue to cast, so strip away the de-reference.
if (unqualified_name.size() > 2 && unqualified_name[0] == '(' && unqualified_name[1] == '*')
{
unqualified_name.erase(unqualified_name.begin(), unqualified_name.begin() + 2);
unqualified_name.pop_back();
}
string name;
auto &var_type = get<SPIRType>(aliased_var.basetype);
auto &data_type = get_variable_data_type(aliased_var);
string descriptor_storage = descriptor_address_space(aliased_var.self, aliased_var.storage, "");
if (aliased_var.storage == StorageClassUniformConstant)
{
if (is_var_runtime_size_array(aliased_var))
{
// This becomes a plain pointer to spvDescriptor.
name = join("reinterpret_cast<", descriptor_storage, " ",
type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), ">(&",
unqualified_name, ")");
}
else
{
name = join("reinterpret_cast<", descriptor_storage, " ",
type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), " &>(",
unqualified_name, ");");
}
}
else
{
// Buffer types.
bool old_is_using_builtin_array = is_using_builtin_array;
is_using_builtin_array = true;
bool needs_post_cast_deref = !is_array(data_type);
string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type));
if (is_var_runtime_size_array(aliased_var))
{
name = join("reinterpret_cast<",
type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " *>(&",
unqualified_name, ")");
}
else
{
name = join(needs_post_cast_deref ? "*" : "", "reinterpret_cast<",
type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " ",
ref_type,
">(", unqualified_name, ");");
}
if (needs_post_cast_deref)
descriptor_storage = get_type_address_space(var_type, aliased_var.self, false);
// These kinds of ridiculous casts trigger warnings in compiler. Just ignore them.
if (!suppress_incompatible_pointer_types_discard_qualifiers)
{
suppress_incompatible_pointer_types_discard_qualifiers = true;
force_recompile_guarantee_forward_progress();
}
is_using_builtin_array = old_is_using_builtin_array;
}
if (!is_var_runtime_size_array(aliased_var))
{
// Lower to temporary, so drop the qualification.
set_qualified_name(aliased_var.self, "");
statement(descriptor_storage, " auto &", to_name(aliased_var.self), " = ", name);
}
else
{
// This will get wrapped in a separate temporary when a spvDescriptorArray wrapper is emitted.
set_qualified_name(aliased_var.self, name);
}
}
void CompilerMSL::analyze_argument_buffers()
{
// Gather all used resources and sort them out into argument buffers.
@ -17867,11 +17983,11 @@ void CompilerMSL::analyze_argument_buffers()
struct Resource
{
SPIRVariable *var;
SPIRVariable *descriptor_alias;
string name;
SPIRType::BaseType basetype;
uint32_t index;
uint32_t plane;
uint32_t overlapping_var_id;
};
SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
SmallVector<uint32_t> inline_block_vars;
@ -17907,32 +18023,6 @@ void CompilerMSL::analyze_argument_buffers()
}
}
// Handle descriptor aliasing as well as we can.
// We can handle aliasing of buffers by casting pointers, but not for typed resources.
// Inline UBOs cannot be handled since it's not a pointer, but inline data.
SPIRVariable *descriptor_alias = nullptr;
if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer)
{
for (auto &resource : resources_in_set[desc_set])
{
if (get_decoration(resource.var->self, DecorationBinding) ==
get_decoration(var_id, DecorationBinding) &&
resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct &&
(resource.var->storage == StorageClassUniform ||
resource.var->storage == StorageClassStorageBuffer))
{
descriptor_alias = resource.var;
// Self-reference marks that we should declare the resource,
// and it's being used as an alias (so we can emit void* instead).
resource.descriptor_alias = resource.var;
// Need to promote interlocked usage so that the primary declaration is correct.
if (interlocked_resources.count(var_id))
interlocked_resources.insert(resource.var->self);
break;
}
}
}
uint32_t binding = get_decoration(var_id, DecorationBinding);
if (type.basetype == SPIRType::SampledImage)
{
@ -17946,14 +18036,14 @@ void CompilerMSL::analyze_argument_buffers()
{
uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
resources_in_set[desc_set].push_back(
{ &var, descriptor_alias, to_name(var_id), SPIRType::Image, image_resource_index, i });
{ &var, to_name(var_id), SPIRType::Image, image_resource_index, i, 0 });
}
if (type.image.dim != DimBuffer && !constexpr_sampler)
{
uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
resources_in_set[desc_set].push_back(
{ &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 });
{ &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0, 0 });
}
}
else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding }))
@ -17966,19 +18056,17 @@ void CompilerMSL::analyze_argument_buffers()
// Inline uniform blocks are always emitted at the end.
add_resource_name(var_id);
uint32_t resource_index = ~0u;
if (!descriptor_alias)
resource_index = get_metal_resource_index(var, type.basetype);
uint32_t resource_index = get_metal_resource_index(var, type.basetype);
resources_in_set[desc_set].push_back(
{ &var, descriptor_alias, to_name(var_id), type.basetype, resource_index, 0 });
{ &var, to_name(var_id), type.basetype, resource_index, 0, 0 });
// Emulate texture2D atomic operations
if (atomic_image_vars_emulated.count(var.self))
{
uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
resources_in_set[desc_set].push_back(
{ &var, descriptor_alias, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 });
{ &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0, 0 });
}
}
@ -18026,7 +18114,7 @@ void CompilerMSL::analyze_argument_buffers()
set_decoration(var_id, DecorationDescriptorSet, desc_set);
set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
resources_in_set[desc_set].push_back(
{ &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
{ &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0, 0 });
}
if (set_needs_buffer_sizes[desc_set])
@ -18037,7 +18125,7 @@ void CompilerMSL::analyze_argument_buffers()
set_decoration(var_id, DecorationDescriptorSet, desc_set);
set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
resources_in_set[desc_set].push_back(
{ &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
{ &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0, 0 });
}
}
}
@ -18049,7 +18137,7 @@ void CompilerMSL::analyze_argument_buffers()
uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
add_resource_name(var_id);
resources_in_set[desc_set].push_back(
{ &var, nullptr, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 });
{ &var, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0, 0 });
}
for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
@ -18098,6 +18186,22 @@ void CompilerMSL::analyze_argument_buffers()
return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype);
});
for (size_t i = 0; i < resources.size() - 1; i++)
{
auto &r1 = resources[i];
auto &r2 = resources[i + 1];
if (r1.index == r2.index)
{
if (r1.overlapping_var_id)
r2.overlapping_var_id = r1.overlapping_var_id;
else
r2.overlapping_var_id = r1.var->self;
set_extended_decoration(r2.var->self, SPIRVCrossDecorationOverlappingBinding, r2.overlapping_var_id);
}
}
uint32_t member_index = 0;
uint32_t next_arg_buff_index = 0;
for (auto &resource : resources)
@ -18113,43 +18217,40 @@ void CompilerMSL::analyze_argument_buffers()
if (msl_options.pad_argument_buffer_resources)
{
auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
if (!resource.descriptor_alias)
while (resource.index > next_arg_buff_index)
{
while (resource.index > next_arg_buff_index)
switch (rez_bind.basetype)
{
switch (rez_bind.basetype)
{
case SPIRType::Void:
case SPIRType::Boolean:
case SPIRType::SByte:
case SPIRType::UByte:
case SPIRType::Short:
case SPIRType::UShort:
case SPIRType::Int:
case SPIRType::UInt:
case SPIRType::Int64:
case SPIRType::UInt64:
case SPIRType::AtomicCounter:
case SPIRType::Half:
case SPIRType::Float:
case SPIRType::Double:
add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::Image:
add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::Sampler:
case SPIRType::Void:
case SPIRType::Boolean:
case SPIRType::SByte:
case SPIRType::UByte:
case SPIRType::Short:
case SPIRType::UShort:
case SPIRType::Int:
case SPIRType::UInt:
case SPIRType::Int64:
case SPIRType::UInt64:
case SPIRType::AtomicCounter:
case SPIRType::Half:
case SPIRType::Float:
case SPIRType::Double:
add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::Image:
add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::Sampler:
add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::SampledImage:
if (next_arg_buff_index == rez_bind.msl_sampler)
add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
case SPIRType::SampledImage:
if (next_arg_buff_index == rez_bind.msl_sampler)
add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
else
add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
default:
break;
}
else
add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
break;
default:
break;
}
}
@ -18197,23 +18298,29 @@ void CompilerMSL::analyze_argument_buffers()
{
// Drop pointer information when we emit the resources into a struct.
buffer_type.member_types.push_back(get_variable_data_type_id(var));
if (resource.plane == 0)
if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
{
if (!msl_options.supports_msl_version(3, 0))
SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+.");
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
entry_func.fixup_hooks_in.push_back([this, resource]() {
emit_argument_buffer_aliased_descriptor(*resource.var, this->get<SPIRVariable>(resource.overlapping_var_id));
});
}
else if (resource.plane == 0)
{
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
}
}
else if (buffers_requiring_dynamic_offset.count(pair))
{
if (resource.descriptor_alias)
SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with dynamic offsets.");
// Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
buffer_type.member_types.push_back(var.basetype);
buffers_requiring_dynamic_offset[pair].second = var.self;
}
else if (inline_uniform_blocks.count(pair))
{
if (resource.descriptor_alias)
SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with inline UBOs.");
// Put the buffer block itself into the argument buffer.
buffer_type.member_types.push_back(get_variable_data_type_id(var));
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
@ -18246,11 +18353,22 @@ void CompilerMSL::analyze_argument_buffers()
}
else
{
if (!resource.descriptor_alias || resource.descriptor_alias == resource.var)
buffer_type.member_types.push_back(var.basetype);
buffer_type.member_types.push_back(var.basetype);
if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
{
// Casting raw pointers is fine since their ABI is fixed, but anything opaque is deeply questionable on Metal 2.
if (get<SPIRVariable>(resource.overlapping_var_id).storage == StorageClassUniformConstant &&
!msl_options.supports_msl_version(3, 0))
{
SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+.");
}
if (resource.descriptor_alias && resource.descriptor_alias != resource.var)
buffer_aliases_argument.push_back({ var.self, resource.descriptor_alias->self });
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
entry_func.fixup_hooks_in.push_back([this, resource]() {
emit_argument_buffer_aliased_descriptor(*resource.var, this->get<SPIRVariable>(resource.overlapping_var_id));
});
}
else if (type.array.empty())
set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")"));
else
@ -18262,6 +18380,8 @@ void CompilerMSL::analyze_argument_buffers()
resource.index);
set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID,
var.self);
if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationOverlappingBinding);
member_index++;
}
}

View File

@ -824,7 +824,8 @@ protected:
SPVFuncImplVariableSizedDescriptor,
SPVFuncImplVariableDescriptorArray,
SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd
SPVFuncImplReduceAdd,
SPVFuncImplImageFence
};
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
@ -1225,6 +1226,9 @@ protected:
uint32_t argument_buffer_discrete_mask = 0;
uint32_t argument_buffer_device_storage_mask = 0;
void emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var,
const SPIRVariable &base_var);
void analyze_argument_buffers();
bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx);
@ -1239,6 +1243,7 @@ protected:
uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective);
bool suppress_missing_prototypes = false;
bool suppress_incompatible_pointer_types_discard_qualifiers = false;
void add_spv_func_and_recompile(SPVFuncImpl spv_func);