From 52c7c2dab6802a94d358fac49d752b28a0d99823 Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Mon, 20 Jun 2022 20:21:00 -0400
Subject: [PATCH 1/4] MSL: Add support for SPV_KHR_physical_storage_buffer
 extension.

- Determine sizing and alignments of pointers to types as
  distinct from the size and alignment of the types themselves.
- Declare all buffer pointers in the MSL device address space.
- Support struct pointer recursion, where structs can
  contain pointers to themselves or to a parent struct.
- Add SPIRType::was_forward_referenced to track if a type was forward
  referenced, to help emit MSL structs in the correct dependency order.
- Handle pointers to pointers that are not just arrays of arrays.
---
 ...ddress-recursive-struct-pointers.msl2.comp | 94 +++++++++++++++++++
 .../comp/buffer_device_address.msl2.comp      | 52 ++++++++++
 .../vert/buffer_device_address.msl2.vert      | 59 ++++++++++++
 ...ddress-recursive-struct-pointers.msl2.comp | 65 +++++++++++++
 .../comp/buffer_device_address.msl2.comp      | 40 ++++++++
 .../vert/buffer_device_address.msl2.vert      | 46 +++++++++
 ...ddress-recursive-struct-pointers.msl2.comp | 64 +++++++++++++
 .../comp/buffer_device_address.msl2.comp      | 86 +++++++++++++++++
 .../vert/buffer_device_address.msl2.vert      | 83 ++++++++++++++++
 spirv_common.hpp                              |  1 +
 spirv_msl.cpp                                 | 55 +++++++++--
 spirv_parser.cpp                              | 11 +++
 12 files changed, 647 insertions(+), 9 deletions(-)
 create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
 create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
 create mode 100644 reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
 create mode 100644 reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
 create mode 100644 reference/shaders-msl/comp/buffer_device_address.msl2.comp
 create mode 100644 reference/shaders-msl/vert/buffer_device_address.msl2.vert
 create mode 100644 shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
 create mode 100644 shaders-msl/comp/buffer_device_address.msl2.comp
 create mode 100644 shaders-msl/vert/buffer_device_address.msl2.vert

diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
new file mode 100644
index 00000000..7b4564c4
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
@@ -0,0 +1,94 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t21
+{
+    int4 m0[2];
+    int m1;
+    device t21* m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t24
+{
+    int4 m0[2];
+    int m1;
+    device t21* m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t35
+{
+    int m0[32];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d<uint, access::write> v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int v8 = 0;
+    int _30 = 0 | (u24.m0[0].x - 0);
+    v8 = _30;
+    int _44 = _30 | (u24.m0[u35.m0[1]].x - 1);
+    v8 = _44;
+    int _50 = _44 | (u24.m1 - 2);
+    v8 = _50;
+    int _60 = _50 | int(u24.m4[0u][0] - 3.0);
+    v8 = _60;
+    int _68 = _60 | int(u24.m4[1u][0] - 5.0);
+    v8 = _68;
+    int _75 = _68 | int(u24.m4[0u][1] - 4.0);
+    v8 = _75;
+    int _82 = _75 | int(u24.m4[1u][1] - 6.0);
+    v8 = _82;
+    int _92 = _82 | (u24.m2[0]->m0[0].x - 3);
+    v8 = _92;
+    int _101 = _92 | (u24.m2[0]->m0[u35.m0[1]].x - 4);
+    v8 = _101;
+    int _109 = _101 | (u24.m2[0]->m1 - 5);
+    v8 = _109;
+    int _118 = _109 | int(u24.m2[0]->m4[0u][0] - 6.0);
+    v8 = _118;
+    int _127 = _118 | int(u24.m2[0]->m4[1u][0] - 8.0);
+    v8 = _127;
+    int _136 = _127 | int(u24.m2[0]->m4[0u][1] - 7.0);
+    v8 = _136;
+    int _145 = _136 | int(u24.m2[0]->m4[1u][1] - 9.0);
+    v8 = _145;
+    int _155 = _145 | (u24.m2[u35.m0[1]]->m0[0].x - 6);
+    v8 = _155;
+    int _167 = _155 | (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7);
+    v8 = _167;
+    int _177 = _167 | (u24.m2[u35.m0[1]]->m1 - 8);
+    v8 = _177;
+    int _187 = _177 | int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0);
+    v8 = _187;
+    int _198 = _187 | int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0);
+    v8 = _198;
+    int _209 = _198 | int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0);
+    v8 = _209;
+    int _220 = _209 | int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0);
+    v8 = _220;
+    int _228 = _220 | (u24.m3->m0[0].x - 9);
+    v8 = _228;
+    int _238 = _228 | (u24.m3->m0[u35.m0[1]].x - 10);
+    v8 = _238;
+    int _246 = _238 | (u24.m3->m1 - 11);
+    v8 = _246;
+    int _254 = _246 | int(u24.m3->m4[0u][0] - 12.0);
+    v8 = _254;
+    int _263 = _254 | int(u24.m3->m4[1u][0] - 14.0);
+    v8 = _263;
+    int _272 = _263 | int(u24.m3->m4[0u][1] - 13.0);
+    v8 = _272;
+    int _281 = _272 | int(u24.m3->m4[1u][1] - 15.0);
+    v8 = _281;
+    uint4 _292 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(_281 != 0));
+    uint4 v284 = _292;
+    v295.write(_292, uint2(int2(gl_GlobalInvocationID.xy)));
+}
+
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000..ed18a406
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,52 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct Registers
+{
+    device PositionReferences* references;
+    float fract_time;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
+
+kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint2 local_offset = gl_GlobalInvocationID.xy;
+    uint _19 = local_offset.y;
+    uint _29 = local_offset.x;
+    uint _30 = ((_19 * 8u) * gl_NumWorkGroups.x) + _29;
+    uint local_index = _30;
+    uint slice = gl_WorkGroupID.z;
+    device Position* positions = registers.references->buffers[gl_WorkGroupID.z];
+    float _66 = float(gl_WorkGroupID.z);
+    float _70 = fract(fma(_66, 0.100000001490116119384765625, registers.fract_time));
+    float _71 = 6.283125400543212890625 * _70;
+    float offset = _71;
+    float2 pos = float2(local_offset);
+    float _83 = sin(fma(2.2000000476837158203125, pos.x, _71));
+    pos.x = fma(0.20000000298023223876953125, _83, pos.x);
+    float _97 = sin(fma(2.25, pos.y, _70 * 12.56625080108642578125));
+    pos.y = fma(0.20000000298023223876953125, _97, pos.y);
+    float _111 = cos(fma(1.7999999523162841796875, pos.y, _70 * 18.849376678466796875));
+    pos.x = fma(0.20000000298023223876953125, _111, pos.x);
+    float _125 = cos(fma(2.849999904632568359375, pos.x, _70 * 25.1325016021728515625));
+    pos.y = fma(0.20000000298023223876953125, _125, pos.y);
+    float _133 = sin(_71);
+    pos.x = fma(0.5, _133, pos.x);
+    float _142 = sin(fma(6.283125400543212890625, _70, 0.300000011920928955078125));
+    pos.y = fma(0.5, _142, pos.y);
+    registers.references->buffers[gl_WorkGroupID.z]->positions[_30] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5);
+}
+
diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000..ff8b5f06
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,59 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct Registers
+{
+    float4x4 view_projection;
+    device PositionReferences* references;
+};
+
+struct main0_out
+{
+    float4 out_color [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    int slice = int(gl_InstanceIndex);
+    const device Position* positions = registers.references->buffers[int(gl_InstanceIndex)];
+    float2 _45 = registers.references->buffers[int(gl_InstanceIndex)]->positions[int(gl_VertexIndex)] * 2.5;
+    float2 pos = _45;
+    float2 _60 = _45 + ((float2(float(int(gl_InstanceIndex) % 8), float(int(gl_InstanceIndex) / 8)) - float2(3.5)) * 3.0);
+    pos = _60;
+    out.gl_Position = registers.view_projection * float4(_60, 0.0, 1.0);
+    int _82 = int(gl_VertexIndex) % 16;
+    int index_x = _82;
+    int _85 = int(gl_VertexIndex) / 16;
+    int index_y = _85;
+    float _92 = sin(float(_82));
+    float _94 = fma(0.300000011920928955078125, _92, 0.5);
+    float r = _94;
+    float _98 = sin(float(_85));
+    float _100 = fma(0.300000011920928955078125, _98, 0.5);
+    float g = _100;
+    int _105 = (_82 ^ _85) & 1;
+    int checkerboard = _105;
+    float _107 = float(_105);
+    float _111 = fma(_107, 0.800000011920928955078125, 0.20000000298023223876953125);
+    float _113 = _94 * _111;
+    r = _113;
+    float _119 = _100 * _111;
+    g = _119;
+    out.out_color = float4(_113, _119, 0.1500000059604644775390625, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
new file mode 100644
index 00000000..e18b4a5c
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
@@ -0,0 +1,65 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t21
+{
+    int4 m0[2];
+    int m1;
+    device t21* m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t24
+{
+    int4 m0[2];
+    int m1;
+    device t21* m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t35
+{
+    int m0[32];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d<uint, access::write> v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int v8 = 0;
+    v8 |= (u24.m0[0].x - 0);
+    v8 |= (u24.m0[u35.m0[1]].x - 1);
+    v8 |= (u24.m1 - 2);
+    v8 |= int(u24.m4[0u][0] - 3.0);
+    v8 |= int(u24.m4[1u][0] - 5.0);
+    v8 |= int(u24.m4[0u][1] - 4.0);
+    v8 |= int(u24.m4[1u][1] - 6.0);
+    v8 |= (u24.m2[0]->m0[0].x - 3);
+    v8 |= (u24.m2[0]->m0[u35.m0[1]].x - 4);
+    v8 |= (u24.m2[0]->m1 - 5);
+    v8 |= int(u24.m2[0]->m4[0u][0] - 6.0);
+    v8 |= int(u24.m2[0]->m4[1u][0] - 8.0);
+    v8 |= int(u24.m2[0]->m4[0u][1] - 7.0);
+    v8 |= int(u24.m2[0]->m4[1u][1] - 9.0);
+    v8 |= (u24.m2[u35.m0[1]]->m0[0].x - 6);
+    v8 |= (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7);
+    v8 |= (u24.m2[u35.m0[1]]->m1 - 8);
+    v8 |= int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0);
+    v8 |= int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0);
+    v8 |= int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0);
+    v8 |= int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0);
+    v8 |= (u24.m3->m0[0].x - 9);
+    v8 |= (u24.m3->m0[u35.m0[1]].x - 10);
+    v8 |= (u24.m3->m1 - 11);
+    v8 |= int(u24.m3->m4[0u][0] - 12.0);
+    v8 |= int(u24.m3->m4[1u][0] - 14.0);
+    v8 |= int(u24.m3->m4[0u][1] - 13.0);
+    v8 |= int(u24.m3->m4[1u][1] - 15.0);
+    uint4 v284 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(v8 != 0));
+    v295.write(v284, uint2(int2(gl_GlobalInvocationID.xy)));
+}
+
diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000..523fcecd
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct Registers
+{
+    device PositionReferences* references;
+    float fract_time;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
+
+kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint2 local_offset = gl_GlobalInvocationID.xy;
+    uint local_index = ((local_offset.y * 8u) * gl_NumWorkGroups.x) + local_offset.x;
+    uint slice = gl_WorkGroupID.z;
+    device Position* positions = registers.references->buffers[slice];
+    float offset = 6.283125400543212890625 * fract(registers.fract_time + (float(slice) * 0.100000001490116119384765625));
+    float2 pos = float2(local_offset);
+    pos.x += (0.20000000298023223876953125 * sin((2.2000000476837158203125 * pos.x) + offset));
+    pos.y += (0.20000000298023223876953125 * sin((2.25 * pos.y) + (2.0 * offset)));
+    pos.x += (0.20000000298023223876953125 * cos((1.7999999523162841796875 * pos.y) + (3.0 * offset)));
+    pos.y += (0.20000000298023223876953125 * cos((2.849999904632568359375 * pos.x) + (4.0 * offset)));
+    pos.x += (0.5 * sin(offset));
+    pos.y += (0.5 * sin(offset + 0.300000011920928955078125));
+    positions->positions[local_index] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5);
+}
+
diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000..3c4d65a5
--- /dev/null
+++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,46 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct Registers
+{
+    float4x4 view_projection;
+    device PositionReferences* references;
+};
+
+struct main0_out
+{
+    float4 out_color [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    int slice = int(gl_InstanceIndex);
+    const device Position* positions = registers.references->buffers[slice];
+    float2 pos = positions->positions[int(gl_VertexIndex)] * 2.5;
+    pos += ((float2(float(slice % 8), float(slice / 8)) - float2(3.5)) * 3.0);
+    out.gl_Position = registers.view_projection * float4(pos, 0.0, 1.0);
+    int index_x = int(gl_VertexIndex) % 16;
+    int index_y = int(gl_VertexIndex) / 16;
+    float r = 0.5 + (0.300000011920928955078125 * sin(float(index_x)));
+    float g = 0.5 + (0.300000011920928955078125 * sin(float(index_y)));
+    int checkerboard = (index_x ^ index_y) & 1;
+    r *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125);
+    g *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125);
+    out.out_color = float4(r, g, 0.1500000059604644775390625, 1.0);
+    return out;
+}
+
diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
new file mode 100644
index 00000000..2bb19eed
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
@@ -0,0 +1,64 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer t21;
+layout(buffer_reference, buffer_reference_align = 16, std140) buffer t21
+{
+    int m0[2];
+    int m1;
+    layout(row_major) t21 m2[2];
+    layout(row_major) t21 m3;
+    layout(row_major) mat2 m4;
+};
+
+layout(set = 0, binding = 1, std140) uniform t24
+{
+    int m0[2];
+    int m1;
+    layout(row_major) t21 m2[2];
+    layout(row_major) t21 m3;
+    layout(row_major) mat2 m4;
+} u24;
+
+layout(push_constant, std430) uniform t35
+{
+    int m0[32];
+} u35;
+
+layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D v295;
+
+void main()
+{
+    int v8 = 0;
+    v8 |= (u24.m0[0] - 0);
+    v8 |= (u24.m0[u35.m0[1]] - 1);
+    v8 |= (u24.m1 - 2);
+    v8 |= int(u24.m4[0].x - 3.0);
+    v8 |= int(u24.m4[0].y - 5.0);
+    v8 |= int(u24.m4[1].x - 4.0);
+    v8 |= int(u24.m4[1].y - 6.0);
+    v8 |= (u24.m2[0].m0[0] - 3);
+    v8 |= (u24.m2[0].m0[u35.m0[1]] - 4);
+    v8 |= (u24.m2[0].m1 - 5);
+    v8 |= int(u24.m2[0].m4[0].x - 6.0);
+    v8 |= int(u24.m2[0].m4[0].y - 8.0);
+    v8 |= int(u24.m2[0].m4[1].x - 7.0);
+    v8 |= int(u24.m2[0].m4[1].y - 9.0);
+    v8 |= (u24.m2[u35.m0[1]].m0[0] - 6);
+    v8 |= (u24.m2[u35.m0[1]].m0[u35.m0[1]] - 7);
+    v8 |= (u24.m2[u35.m0[1]].m1 - 8);
+    v8 |= int(u24.m2[u35.m0[1]].m4[0].x - 9.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[0].y - 11.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[1].x - 10.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[1].y - 12.0);
+    v8 |= (u24.m3.m0[0] - 9);
+    v8 |= (u24.m3.m0[u35.m0[1]] - 10);
+    v8 |= (u24.m3.m1 - 11);
+    v8 |= int(u24.m3.m4[0].x - 12.0);
+    v8 |= int(u24.m3.m4[0].y - 14.0);
+    v8 |= int(u24.m3.m4[1].x - 13.0);
+    v8 |= int(u24.m3.m4[1].y - 15.0);
+    uvec4 v284 = mix(uvec4(1u, 0u, 0u, 1u), uvec4(0u), bvec4(v8 != 0));
+    imageStore(v295, ivec2(gl_GlobalInvocationID.xy), v284);
+}
diff --git a/shaders-msl/comp/buffer_device_address.msl2.comp b/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000..14ac1ef9
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,86 @@
+/* Copyright (c) 2021, Arm Limited and Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 the "License";
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#version 450
+
+// Allows buffer_reference.
+#extension GL_EXT_buffer_reference : require
+
+layout(local_size_x = 8, local_size_y = 8) in;
+
+// If we mark a buffer as buffer_reference, this is treated as a pointer type.
+// A variable with the type Position is a 64-bit pointer to the data within.
+// We can freely cast between pointer types if we wish, but that is not necessary in this sample.
+// buffer_reference_align is used to let the underlying implementation know which alignment to expect.
+// The pointer can have scalar alignment, which is something the compiler cannot know unless you tell it.
+// It is best to use vector alignment when you can for optimal performance, but scalar alignment is sometimes useful.
+// With SSBOs, the API has a minimum offset alignment which guarantees a minimum level of alignment from API side.
+
+// It is possible to forward reference a pointer, so you can contain a pointer to yourself inside a struct.
+// Useful if you need something like a linked list on the GPU.
+// Here it's not particularly useful, but something to know about.
+layout(buffer_reference) buffer Position;
+
+layout(std430, buffer_reference, buffer_reference_align = 8) writeonly buffer Position
+{
+    vec2 positions[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences
+{
+    // This buffer contains an array of pointers to other buffers.
+    Position buffers[];
+};
+
+// In push constant we place a pointer to VBO pointers, spicy!
+// This way we don't need any descriptor sets, but there's nothing wrong with combining use of descriptor sets and buffer device addresses.
+// It is mostly done for convenience here.
+layout(push_constant) uniform Registers
+{
+    PositionReferences references;
+    // A buffer reference is 64-bit, so offset of fract_time is 8 bytes.
+    float fract_time;
+} registers;
+
+void main()
+{
+    // Every slice is a 8x8 grid of vertices which we update here in compute.
+    uvec2 local_offset = gl_GlobalInvocationID.xy;
+    uint local_index = local_offset.y * gl_WorkGroupSize.x * gl_NumWorkGroups.x + local_offset.x;
+    uint slice = gl_WorkGroupID.z;
+
+    restrict Position positions = registers.references.buffers[slice];
+
+    // This is a trivial wave-like function. Arbitrary for demonstration purposes.
+    const float TWO_PI = 3.1415628 * 2.0;
+    float offset = TWO_PI * fract(registers.fract_time + float(slice) * 0.1);
+
+    // Simple grid.
+    vec2 pos = vec2(local_offset);
+
+    // Wobble, wobble.
+    pos.x += 0.2 * sin(2.2 * pos.x + offset);
+    pos.y += 0.2 * sin(2.25 * pos.y + 2.0 * offset);
+    pos.x += 0.2 * cos(1.8 * pos.y + 3.0 * offset);
+    pos.y += 0.2 * cos(2.85 * pos.x + 4.0 * offset);
+    pos.x += 0.5 * sin(offset);
+    pos.y += 0.5 * sin(offset + 0.3);
+
+    // Center the mesh in [-0.5, 0.5] range.
+    // Here we write to a raw pointer.
+    // Be aware, there is no robustness support for buffer_device_address since we don't have a complete descriptor!
+    positions.positions[local_index] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5;
+}
diff --git a/shaders-msl/vert/buffer_device_address.msl2.vert b/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000..ffc88713
--- /dev/null
+++ b/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,83 @@
+/* Copyright (c) 2021, Arm Limited and Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 the "License";
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#version 450
+
+// Allows buffer_reference.
+#extension GL_EXT_buffer_reference : require
+
+// Since we did not enable vertexPipelineStoresAndAtomics, we must mark everything readonly.
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer Position
+{
+    vec2 positions[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences
+{
+    // Represents an array of pointers, where each pointer points to its own VBO (Position).
+    // The size of a pointer (VkDeviceAddress) is always 8 in Vulkan.
+    Position buffers[];
+};
+
+layout(push_constant) uniform Registers
+{
+    mat4 view_projection;
+
+    // This is a pointer to an array of pointers, essentially:
+    // const VBO * const *vbos
+    PositionReferences references;
+} registers;
+
+// Flat shading looks a little cooler here :)
+layout(location = 0) flat out vec4 out_color;
+
+void main()
+{
+    int slice = gl_InstanceIndex;
+
+    // One VBO per instance, load the VBO pointer.
+    // The cool thing here is that a compute shader could hypothetically
+    // write the pointer list where vertices are stored.
+    // With vertex attributes we do not have the luxury to modify VBO bindings on the GPU.
+    // The best we can do is to just modify the vertexOffset in an indirect draw call,
+    // but that's not always flexible enough, and enforces a very specific engine design to work.
+    // We can even modify the attribute layout per slice here, since we can just cast the pointer
+    // to something else if we want.
+    restrict Position positions = registers.references.buffers[slice];
+
+    // Load the vertex based on VertexIndex instead of an attribute. Fully flexible.
+    // Only downside is that we do not get format conversion for free like we do with normal vertex attributes.
+    vec2 pos = positions.positions[gl_VertexIndex] * 2.5;
+
+    // Place the quad meshes on screen and center it.
+    pos += 3.0 * (vec2(slice % 8, slice / 8) - 3.5);
+
+    // Normal projection.
+    gl_Position = registers.view_projection * vec4(pos, 0.0, 1.0);
+
+    // Color the vertex. Use a combination of a wave and checkerboard, completely arbitrary.
+    int index_x = gl_VertexIndex % 16;
+    int index_y = gl_VertexIndex / 16;
+
+    float r = 0.5 + 0.3 * sin(float(index_x));
+    float g = 0.5 + 0.3 * sin(float(index_y));
+
+    int checkerboard = (index_x ^ index_y) & 1;
+    r *= float(checkerboard) * 0.8 + 0.2;
+    g *= float(checkerboard) * 0.8 + 0.2;
+
+    out_color = vec4(r, g, 0.15, 1.0);
+}
diff --git a/spirv_common.hpp b/spirv_common.hpp
index 1c8a7253..7864f0fd 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -591,6 +591,7 @@ struct SPIRType : IVariant
 	uint32_t pointer_depth = 0;
 	bool pointer = false;
 	bool forward_pointer = false;
+	bool was_forward_referenced = false;
 
 	spv::StorageClass storage = spv::StorageClassGeneric;
 
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 4687742b..37e9b9a1 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1980,7 +1980,8 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 		return;
 	}
 
-	if (type.basetype == SPIRType::Struct)
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked))
 	{
 		set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
 
@@ -4039,6 +4040,10 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat
 
 void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
 {
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked))
+		return;
+
 	set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
 
 	// Problem case! Struct needs to be placed at an awkward alignment.
@@ -4065,8 +4070,9 @@ void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
 	uint32_t mbr_cnt = uint32_t(type.member_types.size());
 	for (uint32_t i = 0; i < mbr_cnt; i++)
 	{
+		// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
 		auto &mbr_type = get<SPIRType>(type.member_types[i]);
-		if (mbr_type.basetype == SPIRType::Struct)
+		if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer))
 		{
 			auto *struct_type = &mbr_type;
 			while (!struct_type->array.empty())
@@ -4278,8 +4284,10 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
 	// This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
 	// match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
 	// that struct alignment == max alignment of all members and struct size depends on this alignment.
+	// Can't repack structs, but can repack pointers to structs.
 	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
-	if (mbr_type.basetype == SPIRType::Struct)
+	bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer;
+	if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr)
 		SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
 
 	// Perform remapping here.
@@ -4306,7 +4314,9 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
 		for (uint32_t dim = 0; dim < dimensions; dim++)
 			array_stride /= max(to_array_size_literal(mbr_type, dim), 1u);
 
-		uint32_t elems_per_stride = array_stride / (mbr_type.width / 8);
+		// Pointers are 8 bytes
+		uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8);
+		uint32_t elems_per_stride = array_stride / mbr_width_in_bytes;
 
 		if (elems_per_stride == 3)
 			SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
@@ -4315,7 +4325,9 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
 
 		auto physical_type = mbr_type;
 		physical_type.vecsize = elems_per_stride;
-		physical_type.parent_type = 0;
+		if (!is_buff_ptr)
+			physical_type.parent_type = 0;
+
 		uint32_t type_id = ir.increase_bound_by(1);
 		set<SPIRType>(type_id, physical_type);
 		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
@@ -6868,7 +6880,9 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 			auto &type = id.get<SPIRType>();
 			TypeID type_id = type.self;
 
-			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer;
+			// Forward references can change the declaration dependency order.
+			// Ensure that change is considered when emitting MSL structs, which must be emitted in dependency order.
+			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && (!type.pointer || type.was_forward_referenced);
 			bool is_block =
 			    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
 
@@ -11400,6 +11414,7 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
 		break;
 
 	case StorageClassStorageBuffer:
+	case StorageClassPhysicalStorageBuffer:
 	{
 		// For arguments from variable pointers, we use the write count deduction, so
 		// we should not assume any constness here. Only for global SSBOs.
@@ -13528,7 +13543,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		const char *restrict_kw;
 
 		auto type_address_space = get_type_address_space(type, id);
-		auto type_decl = type_to_glsl(get<SPIRType>(type.parent_type), id);
+		const auto* p_parent_type = &get<SPIRType>(type.parent_type);
 
 		// Work around C pointer qualifier rules. If glsl_type is a pointer type as well
 		// we'll need to emit the address space to the right.
@@ -13536,9 +13551,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		// Prefer emitting thread T *foo over T thread* foo since it's more readable,
 		// but we'll have to emit thread T * thread * T constant bar; for example.
 		if (type_is_pointer_to_pointer(type))
-			type_name = join(type_decl, " ", type_address_space, " ");
+			type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " ");
 		else
-			type_name = join(type_address_space, " ", type_decl);
+		{
+			// Since this is not a pointer-to-pointer, ensure we've dug down to the base type.
+			// Some situations chain pointers even though they are not formally pointers-of-pointers.
+			while(type_is_pointer(*p_parent_type))
+				p_parent_type = &get<SPIRType>(p_parent_type->parent_type);
+
+			type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));
+		}
 
 		switch (type.basetype)
 		{
@@ -15007,6 +15029,17 @@ uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type,
 // Returns the byte size of a struct member.
 uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
 {
+	// Pointers take 8 bytes each
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+	{
+		uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize);
+		size_t dim_cnt = type.array.size();
+		for (uint32_t dim_idx = 0; dim_idx < dim_cnt; dim_idx++)
+			type_size *= to_array_size_literal(type, dim_idx);
+
+		return type_size;
+	}
+
 	switch (type.basetype)
 	{
 	case SPIRType::Unknown:
@@ -15066,6 +15099,10 @@ uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t
 // Returns the byte alignment of a type.
 uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
 {
+	// Pointers aligns on multiples of 8 bytes
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+		return 8 * (type.vecsize == 3 ? 4 : type.vecsize);
+
 	switch (type.basetype)
 	{
 	case SPIRType::Unknown:
diff --git a/spirv_parser.cpp b/spirv_parser.cpp
index 262aa70f..e508f655 100644
--- a/spirv_parser.cpp
+++ b/spirv_parser.cpp
@@ -659,6 +659,16 @@ void Parser::parse(const Instruction &instruction)
 	{
 		uint32_t id = ops[0];
 
+		// The type may have previously been declared as an OpTypeForwardPointer.
+		// If so, we want to retain that knowledge, in case content needs to be emitted
+		// relative to when the forward pointer was declared, not when the pointer is
+		// declared. SPIR-V allows composites to contain a type that has only been
+		// declared as a forward pointer when the composite is defined, but in many
+		// languages, it is necessary to emit the component before the composite.
+		// Retrieve this info *before* overwriting the type info for the id below.
+		auto *fwd_ptr = maybe_get<SPIRType>(id);
+		bool was_fwd_ptr = fwd_ptr && fwd_ptr->forward_pointer;
+
 		// Very rarely, we might receive a FunctionPrototype here.
 		// We won't be able to compile it, but we shouldn't crash when parsing.
 		// We should be able to reflect.
@@ -671,6 +681,7 @@ void Parser::parse(const Instruction &instruction)
 		ptrbase.pointer = true;
 		ptrbase.pointer_depth++;
 		ptrbase.storage = static_cast<StorageClass>(ops[1]);
+		ptrbase.was_forward_referenced = was_fwd_ptr;
 
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;

From 78eb5043f9a9923d5c06c2328ea198fd808fa469 Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Fri, 24 Jun 2022 17:28:17 -0400
Subject: [PATCH 2/4] MSL: Fixes from review for
 SPV_KHR_physical_storage_buffer extension.

- For physical storage pointers, emit forward reference
  declarations in addition to definitions.
- Minor syntax corrections.
---
 ...ddress-recursive-struct-pointers.msl2.comp |  6 +++--
 .../comp/buffer_device_address.msl2.comp      | 13 +++++----
 .../vert/buffer_device_address.msl2.vert      | 13 +++++----
 ...ddress-recursive-struct-pointers.msl2.comp |  6 +++--
 .../comp/buffer_device_address.msl2.comp      | 13 +++++----
 .../vert/buffer_device_address.msl2.vert      | 13 +++++----
 spirv_common.hpp                              |  1 -
 spirv_msl.cpp                                 | 27 +++++++++++++++----
 spirv_parser.cpp                              | 11 --------
 9 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
index 7b4564c4..4d6330e3 100644
--- a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
@@ -3,7 +3,9 @@
 
 using namespace metal;
 
-struct t21
+struct t21;
+
+struct t24
 {
     int4 m0[2];
     int m1;
@@ -12,7 +14,7 @@ struct t21
     float2x4 m4;
 };
 
-struct t24
+struct t21
 {
     int4 m0[2];
     int m1;
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
index ed18a406..b03300ee 100644
--- a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
+++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -3,22 +3,25 @@
 
 using namespace metal;
 
+struct Position;
+struct PositionReferences;
+
 struct Position
 {
     float2 positions[1];
 };
 
-struct PositionReferences
-{
-    device Position* buffers[1];
-};
-
 struct Registers
 {
     device PositionReferences* references;
     float fract_time;
 };
 
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
 
 kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
index ff8b5f06..57f86ec1 100644
--- a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
+++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -3,22 +3,25 @@
 
 using namespace metal;
 
+struct Position;
+struct PositionReferences;
+
 struct Position
 {
     float2 positions[1];
 };
 
-struct PositionReferences
-{
-    device Position* buffers[1];
-};
-
 struct Registers
 {
     float4x4 view_projection;
     device PositionReferences* references;
 };
 
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
 struct main0_out
 {
     float4 out_color [[user(locn0)]];
diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
index e18b4a5c..bbb07380 100644
--- a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
+++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
@@ -3,7 +3,9 @@
 
 using namespace metal;
 
-struct t21
+struct t21;
+
+struct t24
 {
     int4 m0[2];
     int m1;
@@ -12,7 +14,7 @@ struct t21
     float2x4 m4;
 };
 
-struct t24
+struct t21
 {
     int4 m0[2];
     int m1;
diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
index 523fcecd..8c1ff2d1 100644
--- a/reference/shaders-msl/comp/buffer_device_address.msl2.comp
+++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -3,22 +3,25 @@
 
 using namespace metal;
 
+struct Position;
+struct PositionReferences;
+
 struct Position
 {
     float2 positions[1];
 };
 
-struct PositionReferences
-{
-    device Position* buffers[1];
-};
-
 struct Registers
 {
     device PositionReferences* references;
     float fract_time;
 };
 
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
 
 kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
index 3c4d65a5..2fa16bcd 100644
--- a/reference/shaders-msl/vert/buffer_device_address.msl2.vert
+++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -3,22 +3,25 @@
 
 using namespace metal;
 
+struct Position;
+struct PositionReferences;
+
 struct Position
 {
     float2 positions[1];
 };
 
-struct PositionReferences
-{
-    device Position* buffers[1];
-};
-
 struct Registers
 {
     float4x4 view_projection;
     device PositionReferences* references;
 };
 
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
 struct main0_out
 {
     float4 out_color [[user(locn0)]];
diff --git a/spirv_common.hpp b/spirv_common.hpp
index 7864f0fd..1c8a7253 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -591,7 +591,6 @@ struct SPIRType : IVariant
 	uint32_t pointer_depth = 0;
 	bool pointer = false;
 	bool forward_pointer = false;
-	bool was_forward_referenced = false;
 
 	spv::StorageClass storage = spv::StorageClassGeneric;
 
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 37e9b9a1..04cad54a 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -6804,6 +6804,25 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 	// these types for purpose of iterating over them in ir.ids_for_type and friends.
 	auto loop_lock = ir.create_loop_soft_lock();
 
+	// Physical storage buffer pointers can have cyclical references,
+	// so emit forward declarations of them before other structs.
+	// Ignore type_id because we want the underlying struct type from the pointer.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t /* type_id */, const SPIRType &type) {
+		if (type.basetype == SPIRType::Struct &&
+			type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
+			declared_structs.count(type.self) == 0)
+		{
+			statement("struct ", to_name(type.self), ";");
+			declared_structs.insert(type.self);
+			emitted = true;
+		}
+	});
+	if (emitted)
+		statement("");
+
+	emitted = false;
+	declared_structs.clear();
+
 	for (auto &id_ : ir.ids_for_constant_or_type)
 	{
 		auto &id = ir.ids[id_];
@@ -6880,9 +6899,7 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 			auto &type = id.get<SPIRType>();
 			TypeID type_id = type.self;
 
-			// Forward references can change the declaration dependency order.
-			// Ensure that change is considered when emitting MSL structs, which must be emitted in dependency order.
-			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && (!type.pointer || type.was_forward_referenced);
+			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer;
 			bool is_block =
 			    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
 
@@ -13543,7 +13560,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		const char *restrict_kw;
 
 		auto type_address_space = get_type_address_space(type, id);
-		const auto* p_parent_type = &get<SPIRType>(type.parent_type);
+		const auto *p_parent_type = &get<SPIRType>(type.parent_type);
 
 		// Work around C pointer qualifier rules. If glsl_type is a pointer type as well
 		// we'll need to emit the address space to the right.
@@ -13556,7 +13573,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		{
 			// Since this is not a pointer-to-pointer, ensure we've dug down to the base type.
 			// Some situations chain pointers even though they are not formally pointers-of-pointers.
-			while(type_is_pointer(*p_parent_type))
+			while (type_is_pointer(*p_parent_type))
 				p_parent_type = &get<SPIRType>(p_parent_type->parent_type);
 
 			type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));
diff --git a/spirv_parser.cpp b/spirv_parser.cpp
index e508f655..262aa70f 100644
--- a/spirv_parser.cpp
+++ b/spirv_parser.cpp
@@ -659,16 +659,6 @@ void Parser::parse(const Instruction &instruction)
 	{
 		uint32_t id = ops[0];
 
-		// The type may have previously been declared as an OpTypeForwardPointer.
-		// If so, we want to retain that knowledge, in case content needs to be emitted
-		// relative to when the forward pointer was declared, not when the pointer is
-		// declared. SPIR-V allows composites to contain a type that has only been
-		// declared as a forward pointer when the composite is defined, but in many
-		// languages, it is necessary to emit the component before the composite.
-		// Retrieve this info *before* overwriting the type info for the id below.
-		auto *fwd_ptr = maybe_get<SPIRType>(id);
-		bool was_fwd_ptr = fwd_ptr && fwd_ptr->forward_pointer;
-
 		// Very rarely, we might receive a FunctionPrototype here.
 		// We won't be able to compile it, but we shouldn't crash when parsing.
 		// We should be able to reflect.
@@ -681,7 +671,6 @@ void Parser::parse(const Instruction &instruction)
 		ptrbase.pointer = true;
 		ptrbase.pointer_depth++;
 		ptrbase.storage = static_cast<StorageClass>(ops[1]);
-		ptrbase.was_forward_referenced = was_fwd_ptr;
 
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;

From 4185acc70de8d8af5fa633e1e5e96ae6bdca2f9e Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Fri, 1 Jul 2022 16:10:41 -0400
Subject: [PATCH 3/4] MSL: Fixes from review for
 SPV_KHR_physical_storage_buffer extension.

- Assign ulongn physical type to buffer pointers in short arrays
  when array stride is larger than pointer size.
- Support GL_EXT_buffer_reference_uvec2 casting
  buffer reference pointers to and from uvec2 values.
- When packing structs, include structs inside physical buffers.
- Update mechanism for traversing pointer arrays when calculating type sizes.
- Added unit test shaders.
---
 ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 28 +++++++
 ...ress-recursive-struct-pointers.msl23.comp} | 32 ++++----
 ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 26 +++++++
 ...ress-recursive-struct-pointers.msl23.comp} | 32 ++++----
 ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 22 ++++++
 ...ress-recursive-struct-pointers.msl23.comp} |  0
 spirv_glsl.cpp                                |  5 ++
 spirv_glsl.hpp                                |  1 +
 spirv_msl.cpp                                 | 75 ++++++++++++++++---
 spirv_msl.hpp                                 |  2 +
 10 files changed, 181 insertions(+), 42 deletions(-)
 create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
 rename reference/opt/shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (62%)
 create mode 100644 reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
 rename reference/shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (57%)
 create mode 100644 shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
 rename shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (100%)

diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000..f2ba5416
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO;
+
+struct UBO
+{
+    uint2 b;
+};
+
+struct SSBO
+{
+    packed_float3 a1;
+    float a2;
+};
+
+kernel void main0(constant UBO& _10 [[buffer(0)]])
+{
+    ((device SSBO*)as_type<uint64_t>(_10.b))->a1 = float3(1.0, 2.0, 3.0);
+    uint2 _35 = as_type<uint2>((uint64_t)((device SSBO*)as_type<uint64_t>(_10.b + uint2(32u))));
+    uint2 v2 = _35;
+    device SSBO* _39 = ((device SSBO*)as_type<uint64_t>(_35));
+    float3 v3 = float3(_39->a1);
+    _39->a1 = float3(_39->a1) + float3(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
similarity index 62%
rename from reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
rename to reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
index 4d6330e3..d66154b5 100644
--- a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
@@ -9,7 +9,7 @@ struct t24
 {
     int4 m0[2];
     int m1;
-    device t21* m2[2];
+    ulong2 m2[2];
     device t21* m3;
     float2x4 m4;
 };
@@ -18,7 +18,7 @@ struct t21
 {
     int4 m0[2];
     int m1;
-    device t21* m2[2];
+    ulong2 m2[2];
     device t21* m3;
     float2x4 m4;
 };
@@ -47,33 +47,33 @@ kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)
     v8 = _75;
     int _82 = _75 | int(u24.m4[1u][1] - 6.0);
     v8 = _82;
-    int _92 = _82 | (u24.m2[0]->m0[0].x - 3);
+    int _92 = _82 | (((device t21*)u24.m2[0].x)->m0[0].x - 3);
     v8 = _92;
-    int _101 = _92 | (u24.m2[0]->m0[u35.m0[1]].x - 4);
+    int _101 = _92 | (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4);
     v8 = _101;
-    int _109 = _101 | (u24.m2[0]->m1 - 5);
+    int _109 = _101 | (((device t21*)u24.m2[0].x)->m1 - 5);
     v8 = _109;
-    int _118 = _109 | int(u24.m2[0]->m4[0u][0] - 6.0);
+    int _118 = _109 | int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0);
     v8 = _118;
-    int _127 = _118 | int(u24.m2[0]->m4[1u][0] - 8.0);
+    int _127 = _118 | int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0);
     v8 = _127;
-    int _136 = _127 | int(u24.m2[0]->m4[0u][1] - 7.0);
+    int _136 = _127 | int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0);
     v8 = _136;
-    int _145 = _136 | int(u24.m2[0]->m4[1u][1] - 9.0);
+    int _145 = _136 | int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0);
     v8 = _145;
-    int _155 = _145 | (u24.m2[u35.m0[1]]->m0[0].x - 6);
+    int _155 = _145 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6);
     v8 = _155;
-    int _167 = _155 | (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7);
+    int _167 = _155 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7);
     v8 = _167;
-    int _177 = _167 | (u24.m2[u35.m0[1]]->m1 - 8);
+    int _177 = _167 | (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8);
     v8 = _177;
-    int _187 = _177 | int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0);
+    int _187 = _177 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0);
     v8 = _187;
-    int _198 = _187 | int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0);
+    int _198 = _187 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0);
     v8 = _198;
-    int _209 = _198 | int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0);
+    int _209 = _198 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0);
     v8 = _209;
-    int _220 = _209 | int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0);
+    int _220 = _209 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0);
     v8 = _220;
     int _228 = _220 | (u24.m3->m0[0].x - 9);
     v8 = _228;
diff --git a/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000..474d5092
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO;
+
+struct UBO
+{
+    uint2 b;
+};
+
+struct SSBO
+{
+    packed_float3 a1;
+    float a2;
+};
+
+kernel void main0(constant UBO& _10 [[buffer(0)]])
+{
+    ((device SSBO*)as_type<uint64_t>(_10.b))->a1 = float3(1.0, 2.0, 3.0);
+    uint2 v2 = as_type<uint2>((uint64_t)((device SSBO*)as_type<uint64_t>(_10.b + uint2(32u))));
+    float3 v3 = float3(((device SSBO*)as_type<uint64_t>(v2))->a1);
+    ((device SSBO*)as_type<uint64_t>(v2))->a1 = v3 + float3(1.0);
+}
+
diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
similarity index 57%
rename from reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
rename to reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
index bbb07380..35b7af54 100644
--- a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
+++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
@@ -9,7 +9,7 @@ struct t24
 {
     int4 m0[2];
     int m1;
-    device t21* m2[2];
+    ulong2 m2[2];
     device t21* m3;
     float2x4 m4;
 };
@@ -18,7 +18,7 @@ struct t21
 {
     int4 m0[2];
     int m1;
-    device t21* m2[2];
+    ulong2 m2[2];
     device t21* m3;
     float2x4 m4;
 };
@@ -40,20 +40,20 @@ kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)
     v8 |= int(u24.m4[1u][0] - 5.0);
     v8 |= int(u24.m4[0u][1] - 4.0);
     v8 |= int(u24.m4[1u][1] - 6.0);
-    v8 |= (u24.m2[0]->m0[0].x - 3);
-    v8 |= (u24.m2[0]->m0[u35.m0[1]].x - 4);
-    v8 |= (u24.m2[0]->m1 - 5);
-    v8 |= int(u24.m2[0]->m4[0u][0] - 6.0);
-    v8 |= int(u24.m2[0]->m4[1u][0] - 8.0);
-    v8 |= int(u24.m2[0]->m4[0u][1] - 7.0);
-    v8 |= int(u24.m2[0]->m4[1u][1] - 9.0);
-    v8 |= (u24.m2[u35.m0[1]]->m0[0].x - 6);
-    v8 |= (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7);
-    v8 |= (u24.m2[u35.m0[1]]->m1 - 8);
-    v8 |= int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0);
-    v8 |= int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0);
-    v8 |= int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0);
-    v8 |= int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0);
+    v8 |= (((device t21*)u24.m2[0].x)->m0[0].x - 3);
+    v8 |= (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4);
+    v8 |= (((device t21*)u24.m2[0].x)->m1 - 5);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0);
     v8 |= (u24.m3->m0[0].x - 9);
     v8 |= (u24.m3->m0[u35.m0[1]].x - 10);
     v8 |= (u24.m3->m1 - 11);
diff --git a/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000..9212e04c
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+
+layout(buffer_reference, buffer_reference_align = 4) buffer SSBO
+{
+	vec3 a1;			// Will be 12-byte packed
+	float a2;
+};
+
+layout(push_constant) uniform UBO
+{
+	uvec2 b;
+};
+
+void main()
+{
+	SSBO(b).a1 = vec3(1.0, 2.0, 3.0);		// uvec2 -> buff ref and assign to packed
+	uvec2 v2 = uvec2(SSBO(b + 32));			// uvec2 -> buff ref -> uvec2
+	vec3 v3 = SSBO(v2).a1;					// uvec2 -> buff ref and assign from packed
+	SSBO(v2).a1 = v3 + 1.0;					// uvec2 -> buff ref and assign to packed
+}
diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
similarity index 100%
rename from shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp
rename to shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 80d40c0d..a9094174 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -8981,6 +8981,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		if (!is_literal)
 			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
 		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
+		check_physical_type_cast(expr, type, physical_type);
 	};
 
 	for (uint32_t i = 0; i < count; i++)
@@ -9313,6 +9314,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 	return expr;
 }
 
+void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
+{
+}
+
 void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
 {
 }
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index edb09fa6..dc693787 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -711,6 +711,7 @@ protected:
 	spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
 	virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
 
+	virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
 	virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
 	                                                    spv::StorageClass storage, bool &is_packed);
 
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 04cad54a..f849adff 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1967,6 +1967,13 @@ void CompilerMSL::mark_packable_structs()
 				mark_as_packable(type);
 		}
 	});
+
+	// Physical storage buffer pointers can appear outside of the context of a variable, if the address
+	// is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+		if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+			mark_as_packable(type);
+	});
 }
 
 // If the specified type is a struct, it and any nested structs
@@ -4325,8 +4332,17 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
 
 		auto physical_type = mbr_type;
 		physical_type.vecsize = elems_per_stride;
-		if (!is_buff_ptr)
-			physical_type.parent_type = 0;
+		physical_type.parent_type = 0;
+
+		// If this is a physical buffer pointer, replace type with a ulongn vector.
+		if (is_buff_ptr)
+		{
+			physical_type.width = 64;
+			physical_type.basetype = to_unsigned_basetype(physical_type.width);
+			physical_type.pointer = false;
+			physical_type.pointer_depth = false;
+			physical_type.forward_pointer = false;
+		}
 
 		uint32_t type_id = ir.increase_bound_by(1);
 		set<SPIRType>(type_id, physical_type);
@@ -7709,6 +7725,23 @@ void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t
 	set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index);
 }
 
+
+// If the physical type of a physical buffer pointer has been changed
+// to a ulong or ulongn vector, add a cast back to the pointer type.
+void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type)
+{
+	auto *p_physical_type = maybe_get<SPIRType>(physical_type);
+	if (p_physical_type &&
+		p_physical_type->storage == StorageClassPhysicalStorageBuffer &&
+		p_physical_type->basetype == to_unsigned_basetype(64))
+	{
+		if (p_physical_type->vecsize > 1)
+			expr += ".x";
+
+		expr = join("((", type_to_glsl(*type), ")", expr, ")");
+	}
+}
+
 // Override for MSL-specific syntax instructions
 void CompilerMSL::emit_instruction(const Instruction &instruction)
 {
@@ -14362,18 +14395,31 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
 	// size (eg. short shift right becomes int), which means chaining integer ops
 	// together may introduce size variations that SPIR-V doesn't know about.
 	if (same_size_cast && !integral_cast)
-	{
 		return "as_type<" + type_to_glsl(out_type) + ">";
-	}
 	else
-	{
 		return type_to_glsl(out_type);
-	}
 }
 
-bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
+bool CompilerMSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
 {
-	return false;
+	auto &out_type = get<SPIRType>(result_type);
+	auto &in_type = expression_type(op0);
+	bool uvec2_to_ptr = (in_type.basetype == SPIRType::UInt && in_type.vecsize == 2 &&
+						 out_type.pointer && out_type.storage == StorageClassPhysicalStorageBuffer);
+	bool ptr_to_uvec2 = (in_type.pointer && in_type.storage == StorageClassPhysicalStorageBuffer &&
+						 out_type.basetype == SPIRType::UInt && out_type.vecsize == 2);
+	string expr;
+
+	// Casting between uvec2 and buffer storage pointer per GL_EXT_buffer_reference_uvec2
+	if (uvec2_to_ptr)
+		expr = join("((", type_to_glsl(out_type), ")as_type<uint64_t>(", to_unpacked_expression(op0), "))");
+	else if (ptr_to_uvec2)
+		expr = join("as_type<", type_to_glsl(out_type), ">((uint64_t)", to_unpacked_expression(op0), ")");
+	else
+		return false;
+
+	emit_op(result_type, id, expr, should_forward(op0));
+	return true;
 }
 
 // Returns an MSL string identifying the name of a SPIR-V builtin.
@@ -15050,9 +15096,18 @@ uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_p
 	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
 	{
 		uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize);
+
+		// Work our way through potentially layered arrays,
+		// stopping when we hit a pointer that is not also an array.
 		size_t dim_cnt = type.array.size();
-		for (uint32_t dim_idx = 0; dim_idx < dim_cnt; dim_idx++)
-			type_size *= to_array_size_literal(type, dim_idx);
+		int32_t dim_idx = dim_cnt - 1;
+		auto *p_type = &type;
+		while (!type_is_pointer(*p_type) && dim_idx >= 0)
+		{
+			type_size *= to_array_size_literal(*p_type, dim_idx);
+			p_type = &get<SPIRType>(p_type->parent_type);
+			dim_idx--;
+		}
 
 		return type_size;
 	}
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index b1abd12a..c0317c7a 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -986,6 +986,8 @@ protected:
 	void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
 	                                            bool &is_packed) override;
 	void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
+	void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
+
 	bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
 	bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
 	bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);

From 064a697b18c135514d08b580793da77fb37061ef Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Fri, 1 Jul 2022 17:09:35 -0400
Subject: [PATCH 4/4] MSL: Fix implicit conversion precision build error.

---
 spirv_msl.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index f849adff..3b3d8a2b 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -15099,8 +15099,7 @@ uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_p
 
 		// Work our way through potentially layered arrays,
 		// stopping when we hit a pointer that is not also an array.
-		size_t dim_cnt = type.array.size();
-		int32_t dim_idx = dim_cnt - 1;
+		int32_t dim_idx = (int32_t)type.array.size() - 1;
 		auto *p_type = &type;
 		while (!type_is_pointer(*p_type) && dim_idx >= 0)
 		{