Merged in cts-shared-composite-variables (pull request #1)

Fix shared composite variables tests Approved-by: Steven Winston
2022-08-08 00:35:49 +00:00 · 2022-08-08 00:35:49 +00:00 · a9cadd4982
commit a9cadd4982
parent 0cccd0a65a fc4a12fd4f
19 changed files with 11126 additions and 13 deletions
--- a/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp
+++ b/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp
--- a/reference/opt/shaders-msl/comp/shared-matrix-cast.comp
+++ b/reference/opt/shaders-msl/comp/shared-matrix-cast.comp
--- a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp
+++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp
--- a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp
+++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp
--- a/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp
+++ b/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp
@ -0,0 +1,63 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct S1
+{
+    int3 a;
+    uint2 b;
+    short4 c;
+    uint d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _132 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = int3(6, 8, 8);
+    s1.b = uint2(4u);
+    s1.c = short4(bool4(false, false, false, true));
+    s1.d = 6u;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _144 = all(int3(6, 8, 8) == s1.a);
+    bool _108;
+    if (_144)
+    {
+        _108 = all(uint2(4u) == s1.b);
+    }
+    else
+    {
+        _108 = _144;
+    }
+    bool _117;
+    if (_108)
+    {
+        _117 = all(bool4(false, false, false, true) == bool4(s1.c));
+    }
+    else
+    {
+        _117 = _108;
+    }
+    bool _126;
+    if (_117)
+    {
+        _126 = 6u == s1.d;
+    }
+    else
+    {
+        _126 = _117;
+    }
+    if (_126)
+    {
+        _132.passed++;
+    }
+}
+
--- a/reference/shaders-msl/comp/shared-matrix-array-of-array.comp
+++ b/reference/shaders-msl/comp/shared-matrix-array-of-array.comp
--- a/reference/shaders-msl/comp/shared-matrix-cast.comp
+++ b/reference/shaders-msl/comp/shared-matrix-cast.comp
--- a/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp
+++ b/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp
--- a/reference/shaders-msl/comp/shared-matrix-nested-struct.comp
+++ b/reference/shaders-msl/comp/shared-matrix-nested-struct.comp
--- a/reference/shaders-msl/comp/shared-struct-bool-cast.comp
+++ b/reference/shaders-msl/comp/shared-struct-bool-cast.comp
@ -0,0 +1,110 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct S1
+{
+    int3 a;
+    uint2 b;
+    short4 c;
+    uint d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_ivec3(thread const int3& a, thread const int3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uvec2(thread const uint2& a, thread const uint2& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec4(thread const bool4& a, thread const bool4& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uint(thread const uint& a, thread const uint& b)
+{
+    return a == b;
+}
+
+kernel void main0(device block& _132 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = int3(6, 8, 8);
+    s1.b = uint2(4u);
+    s1.c = short4(bool4(false, false, false, true));
+    s1.d = 6u;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _99;
+    if (allOk)
+    {
+        int3 param = int3(6, 8, 8);
+        int3 param_1 = s1.a;
+        _99 = compare_ivec3(param, param_1);
+    }
+    else
+    {
+        _99 = allOk;
+    }
+    allOk = _99;
+    bool _108;
+    if (allOk)
+    {
+        uint2 param_2 = uint2(4u);
+        uint2 param_3 = s1.b;
+        _108 = compare_uvec2(param_2, param_3);
+    }
+    else
+    {
+        _108 = allOk;
+    }
+    allOk = _108;
+    bool _117;
+    if (allOk)
+    {
+        bool4 param_4 = bool4(false, false, false, true);
+        bool4 param_5 = bool4(s1.c);
+        _117 = compare_bvec4(param_4, param_5);
+    }
+    else
+    {
+        _117 = allOk;
+    }
+    allOk = _117;
+    bool _126;
+    if (allOk)
+    {
+        uint param_6 = 6u;
+        uint param_7 = s1.d;
+        _126 = compare_uint(param_6, param_7);
+    }
+    else
+    {
+        _126 = allOk;
+    }
+    allOk = _126;
+    if (allOk)
+    {
+        _132.passed++;
+    }
+}
+
--- a/shaders-msl/comp/shared-matrix-array-of-array.comp
+++ b/shaders-msl/comp/shared-matrix-array-of-array.comp
@ -0,0 +1,65 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump mat4x3 a[2];
+	lowp float b;
+	lowp vec2 c[3];
+};
+struct S2 {
+	highp ivec4 a;
+	bool b[3][1][3];
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }
+bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }
+bool compare_ivec4    (highp ivec4 a, highp ivec4 b)  { return a == b; }
+bool compare_bool     (bool a, bool b)                { return a == b; }
+
+shared S1 s1;
+shared S2 s2;
+
+void main (void) {
+	s1.a[0] = mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0);
+	s1.a[1] = mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0);
+	s1.b = 7.0;
+	s1.c[0] = vec2(-5.0, -4.0);
+	s1.c[1] = vec2(3.0, -5.0);
+	s1.c[2] = vec2(-3.0, -1.0);
+	s2.a = ivec4(1, 0, -3, 1);
+	s2.b[0][0][0] = true;
+	s2.b[0][0][1] = false;
+	s2.b[0][0][2] = false;
+	s2.b[1][0][0] = true;
+	s2.b[1][0][1] = false;
+	s2.b[1][0][2] = true;
+	s2.b[2][0][0] = false;
+	s2.b[2][0][1] = true;
+	s2.b[2][0][2] = true;
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_mat4x3(mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0), s1.a[0]);
+	allOk = allOk && compare_mat4x3(mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0), s1.a[1]);
+	allOk = allOk && compare_float(7.0, s1.b);
+	allOk = allOk && compare_vec2(vec2(-5.0, -4.0), s1.c[0]);
+	allOk = allOk && compare_vec2(vec2(3.0, -5.0), s1.c[1]);
+	allOk = allOk && compare_vec2(vec2(-3.0, -1.0), s1.c[2]);
+	allOk = allOk && compare_ivec4(ivec4(1, 0, -3, 1), s2.a);
+	allOk = allOk && compare_bool(true, s2.b[0][0][0]);
+	allOk = allOk && compare_bool(false, s2.b[0][0][1]);
+	allOk = allOk && compare_bool(false, s2.b[0][0][2]);
+	allOk = allOk && compare_bool(true, s2.b[1][0][0]);
+	allOk = allOk && compare_bool(false, s2.b[1][0][1]);
+	allOk = allOk && compare_bool(true, s2.b[1][0][2]);
+	allOk = allOk && compare_bool(false, s2.b[2][0][0]);
+	allOk = allOk && compare_bool(true, s2.b[2][0][1]);
+	allOk = allOk && compare_bool(true, s2.b[2][0][2]);
+	if (allOk)
+		passed++;
+
+}
--- a/shaders-msl/comp/shared-matrix-cast.comp
+++ b/shaders-msl/comp/shared-matrix-cast.comp
@ -0,0 +1,33 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump vec4 a;
+	highp mat3x2 b;
+	bvec4 c;
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a = vec4(1.0, -5.0, -9.0, -5.0);
+	s1.b = mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0);
+	s1.c = bvec4(false, true, false, false);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_vec4(vec4(1.0, -5.0, -9.0, -5.0), s1.a);
+	allOk = allOk && compare_mat3x2(mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0), s1.b);
+	allOk = allOk && compare_bvec4(bvec4(false, true, false, false), s1.c);
+	if (allOk)
+		passed++;
+
+}
--- a/shaders-msl/comp/shared-matrix-nested-struct-array.comp
+++ b/shaders-msl/comp/shared-matrix-nested-struct-array.comp
@ -0,0 +1,87 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct sA
+{
+	mediump mat2x3 mA;
+};
+struct sB
+{
+	mediump mat2 mA;
+	mediump mat3x2 mB;
+	highp uvec3 mC;
+};
+struct sC
+{
+	sA mA;
+	sB mB;
+};
+struct sD
+{
+	sC mA;
+};
+struct sE
+{
+	lowp mat3x2 mA;
+	lowp mat4x3 mB;
+};
+struct sF
+{
+	sE mA;
+};
+struct sG
+{
+	sF mA;
+};
+struct sH
+{
+	bvec3 mA[2];
+};
+struct S1 {
+	sD a;
+	sG b;
+	sH c[2];
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }
+bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }
+bool compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }
+bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }
+bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a.mA.mA.mA = mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0);
+	s1.a.mA.mB.mA = mat2(9.0, -4.0, -6.0, -1.0);
+	s1.a.mA.mB.mB = mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0);
+	s1.a.mA.mB.mC = uvec3(3u, 1u, 5u);
+	s1.b.mA.mA.mA = mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0);
+	s1.b.mA.mA.mB = mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0);
+	s1.c[0].mA[0] = bvec3(true, false, false);
+	s1.c[0].mA[1] = bvec3(true, false, false);
+	s1.c[1].mA[0] = bvec3(false, false, false);
+	s1.c[1].mA[1] = bvec3(false, false, false);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_mat2x3(mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0), s1.a.mA.mA.mA);
+	allOk = allOk && compare_mat2(mat2(9.0, -4.0, -6.0, -1.0), s1.a.mA.mB.mA);
+	allOk = allOk && compare_mat3x2(mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0), s1.a.mA.mB.mB);
+	allOk = allOk && compare_uvec3(uvec3(3u, 1u, 5u), s1.a.mA.mB.mC);
+	allOk = allOk && compare_mat3x2(mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0), s1.b.mA.mA.mA);
+	allOk = allOk && compare_mat4x3(mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0), s1.b.mA.mA.mB);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[0]);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[1]);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[0]);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[1]);
+	if (allOk)
+		passed++;
+
+}
--- a/shaders-msl/comp/shared-matrix-nested-struct.comp
+++ b/shaders-msl/comp/shared-matrix-nested-struct.comp
@ -0,0 +1,141 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct sA
+{
+	highp mat4 mA;
+	bvec3 mB;
+	bvec4 mC;
+};
+struct sB
+{
+	bvec2 mA;
+};
+struct sC
+{
+	highp float mA;
+	mediump uvec4 mB;
+	mediump float mC;
+};
+struct sD
+{
+	sA mA;
+	sB mB;
+	sC mC;
+};
+struct sE
+{
+	sD mA;
+};
+struct sF
+{
+	lowp uvec3 mA;
+	bool mB;
+};
+struct sG
+{
+	sF mA;
+	highp mat3x2 mB;
+};
+struct sH
+{
+	sG mA;
+	mediump vec2 mB;
+};
+struct sI
+{
+	mediump mat2 mA;
+	bvec3 mB;
+	bvec4 mC;
+};
+struct sJ
+{
+	sI mA;
+	bvec3 mB;
+};
+struct sK
+{
+	bvec2 mA;
+	sJ mB;
+	mediump ivec2 mC;
+};
+struct S1 {
+	lowp uint a;
+	mediump vec4 b;
+};
+struct S2 {
+	sE a;
+	highp ivec3 b;
+	sH c;
+	sK d;
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }
+bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_mat4     (highp mat4 a, highp mat4 b)    { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }
+bool compare_ivec2    (highp ivec2 a, highp ivec2 b)  { return a == b; }
+bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }
+bool compare_uint     (highp uint a, highp uint b)    { return a == b; }
+bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }
+bool compare_uvec4    (highp uvec4 a, highp uvec4 b)  { return a == b; }
+bool compare_bool     (bool a, bool b)                { return a == b; }
+bool compare_bvec2    (bvec2 a, bvec2 b)              { return a == b; }
+bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+shared S2 s2;
+
+void main (void) {
+	s1.a = 0u;
+	s1.b = vec4(8.0, 8.0, 0.0, -4.0);
+	s2.a.mA.mA.mA = mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0);
+	s2.a.mA.mA.mB = bvec3(true, false, false);
+	s2.a.mA.mA.mC = bvec4(true, true, true, false);
+	s2.a.mA.mB.mA = bvec2(true, true);
+	s2.a.mA.mC.mA = 7.0;
+	s2.a.mA.mC.mB = uvec4(8u, 6u, 2u, 0u);
+	s2.a.mA.mC.mC = -9.0;
+	s2.b = ivec3(1, -4, 0);
+	s2.c.mA.mA.mA = uvec3(4u, 9u, 1u);
+	s2.c.mA.mA.mB = false;
+	s2.c.mA.mB = mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0);
+	s2.c.mB = vec2(-6.0, -9.0);
+	s2.d.mA = bvec2(true, false);
+	s2.d.mB.mA.mA = mat2(-2.0, 3.0, 7.0, 2.0);
+	s2.d.mB.mA.mB = bvec3(false, false, false);
+	s2.d.mB.mA.mC = bvec4(false, false, false, true);
+	s2.d.mB.mB = bvec3(true, false, false);
+	s2.d.mC = ivec2(-9, 0);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_uint(0u, s1.a);
+	allOk = allOk && compare_vec4(vec4(8.0, 8.0, 0.0, -4.0), s1.b);
+	allOk = allOk && compare_mat4(mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0), s2.a.mA.mA.mA);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.a.mA.mA.mB);
+	allOk = allOk && compare_bvec4(bvec4(true, true, true, false), s2.a.mA.mA.mC);
+	allOk = allOk && compare_bvec2(bvec2(true, true), s2.a.mA.mB.mA);
+	allOk = allOk && compare_float(7.0, s2.a.mA.mC.mA);
+	allOk = allOk && compare_uvec4(uvec4(8u, 6u, 2u, 0u), s2.a.mA.mC.mB);
+	allOk = allOk && compare_float(-9.0, s2.a.mA.mC.mC);
+	allOk = allOk && compare_ivec3(ivec3(1, -4, 0), s2.b);
+	allOk = allOk && compare_uvec3(uvec3(4u, 9u, 1u), s2.c.mA.mA.mA);
+	allOk = allOk && compare_bool(false, s2.c.mA.mA.mB);
+	allOk = allOk && compare_mat3x2(mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0), s2.c.mA.mB);
+	allOk = allOk && compare_vec2(vec2(-6.0, -9.0), s2.c.mB);
+	allOk = allOk && compare_bvec2(bvec2(true, false), s2.d.mA);
+	allOk = allOk && compare_mat2(mat2(-2.0, 3.0, 7.0, 2.0), s2.d.mB.mA.mA);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s2.d.mB.mA.mB);
+	allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s2.d.mB.mA.mC);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.d.mB.mB);
+	allOk = allOk && compare_ivec2(ivec2(-9, 0), s2.d.mC);
+	if (allOk)
+		passed++;
+
+}
--- a/shaders-msl/comp/shared-struct-bool-cast.comp
+++ b/shaders-msl/comp/shared-struct-bool-cast.comp
@ -0,0 +1,35 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump ivec3 a;
+	highp uvec2 b;
+	bvec4 c;
+	mediump uint d;
+};
+
+bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }
+bool compare_uint     (highp uint a, highp uint b)    { return a == b; }
+bool compare_uvec2    (highp uvec2 a, highp uvec2 b)  { return a == b; }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a = ivec3(6, 8, 8);
+	s1.b = uvec2(4u, 4u);
+	s1.c = bvec4(false, false, false, true);
+	s1.d = 6u;
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_ivec3(ivec3(6, 8, 8), s1.a);
+	allOk = allOk && compare_uvec2(uvec2(4u, 4u), s1.b);
+	allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s1.c);
+	allOk = allOk && compare_uint(6u, s1.d);
+	if (allOk)
+		passed++;
+
+}
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -1636,6 +1636,12 @@ enum ExtendedDecorations
 	// results of interpolation can.
 	SPIRVCrossDecorationInterpolantComponentExpr,

+	// Apply to any struct type that is used in the Workgroup storage class.
+	// This causes matrices in MSL prior to Metal 3.0 to be emitted using a special
+	// class that is convertible to the standard matrix type, to work around the
+	// lack of constructors in the 'threadgroup' address space.
+	SPIRVCrossDecorationWorkgroupStruct,
+
 	SPIRVCrossDecorationCount
 };

--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -10741,9 +10741,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (expr_type.vecsize > type.vecsize)
 			expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));

+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = old_need_transpose;
+
 		// We might need to cast in order to load from a builtin.
 		cast_from_variable_load(ptr, expr, type);

+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = false;
+
 		// We might be trying to load a gl_Position[N], where we should be
 		// doing float4[](gl_in[i].gl_Position, ...) instead.
 		// Similar workarounds are required for input arrays in tessellation.
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -1966,6 +1966,13 @@ void CompilerMSL::mark_packable_structs()
 			    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
 				mark_as_packable(type);
 		}
+
+		if (var.storage == StorageClassWorkgroup)
+		{
+			auto *type = &this->get<SPIRType>(var.basetype);
+			if (type->basetype == SPIRType::Struct)
+				mark_as_workgroup_struct(*type);
+		}
 	});

 	// Physical storage buffer pointers can appear outside of the context of a variable, if the address
@ -2008,6 +2015,38 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 	}
 }

+// If the specified type is a struct, it and any nested structs
+// are marked as used with workgroup storage using the SPIRVCrossDecorationWorkgroupStruct decoration.
+void CompilerMSL::mark_as_workgroup_struct(SPIRType &type)
+{
+	// If this is not the base type (eg. it's a pointer or array), tunnel down
+	if (type.parent_type)
+	{
+		mark_as_workgroup_struct(get<SPIRType>(type.parent_type));
+		return;
+	}
+
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
+	{
+		set_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct);
+
+		// Recurse
+		uint32_t mbr_cnt = uint32_t(type.member_types.size());
+		for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+		{
+			uint32_t mbr_type_id = type.member_types[mbr_idx];
+			auto &mbr_type = get<SPIRType>(mbr_type_id);
+			mark_as_workgroup_struct(mbr_type);
+			if (mbr_type.type_alias)
+			{
+				auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
+				mark_as_workgroup_struct(mbr_type_alias);
+			}
+		}
+	}
+}
+
 // If a shader input exists at the location, it is marked as being used by this shader
 void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
                                                  StorageClass storage, bool fallback)
@ -4828,6 +4867,10 @@ void CompilerMSL::add_typedef_line(const string &line)
 // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
 void CompilerMSL::emit_custom_templates()
 {
+	static const string address_spaces[] = {
+		"thread", "constant", "device", "threadgroup", "threadgroup_imageblock", "ray_data", "object_data"
+	};
+
 	for (const auto &spv_func : spv_function_implementations)
 	{
 		switch (spv_func)
@ -4873,6 +4916,122 @@ void CompilerMSL::emit_custom_templates()
 			statement("");
 			break;

+		case SPVFuncImplStorageMatrix:
+			statement("template<typename T, int Cols, int Rows=Cols>");
+			statement("struct spvStorageMatrix");
+			begin_scope();
+			statement("vec<T, Rows> columns[Cols];");
+			statement("");
+			for (size_t method_idx = 0; method_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++method_idx)
+			{
+				// Some address spaces require particular features.
+				if (method_idx == 4) // threadgroup_imageblock
+					statement("#ifdef __HAVE_IMAGEBLOCKS__");
+				else if (method_idx == 5) // ray_data
+					statement("#ifdef __HAVE_RAYTRACING__");
+				else if (method_idx == 6) // object_data
+					statement("#ifdef __HAVE_MESH__");
+				const string &method_as = address_spaces[method_idx];
+				statement("spvStorageMatrix() ", method_as, " = default;");
+				if (method_idx != 1) // constant
+				{
+					statement(method_as, " spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ",
+					          method_as);
+					begin_scope();
+					statement("size_t i;");
+					statement("thread vec<T, Rows>* col;");
+					statement("for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)");
+					statement("    columns[i] = *col;");
+					statement("return *this;");
+					end_scope();
+				}
+				statement("");
+				for (size_t param_idx = 0; param_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++param_idx)
+				{
+					if (param_idx != method_idx)
+					{
+						if (param_idx == 4) // threadgroup_imageblock
+							statement("#ifdef __HAVE_IMAGEBLOCKS__");
+						else if (param_idx == 5) // ray_data
+							statement("#ifdef __HAVE_RAYTRACING__");
+						else if (param_idx == 6) // object_data
+							statement("#ifdef __HAVE_MESH__");
+					}
+					const string &param_as = address_spaces[param_idx];
+					statement("spvStorageMatrix(const ", param_as, " matrix<T, Cols, Rows>& m) ", method_as);
+					begin_scope();
+					statement("for (size_t i = 0; i < Cols; ++i)");
+					statement("    columns[i] = m.columns[i];");
+					end_scope();
+					statement("spvStorageMatrix(const ", param_as, " spvStorageMatrix& m) ", method_as, " = default;");
+					if (method_idx != 1) // constant
+					{
+						statement(method_as, " spvStorageMatrix& operator=(const ", param_as,
+						          " matrix<T, Cols, Rows>& m) ", method_as);
+						begin_scope();
+						statement("for (size_t i = 0; i < Cols; ++i)");
+						statement("    columns[i] = m.columns[i];");
+						statement("return *this;");
+						end_scope();
+						statement(method_as, " spvStorageMatrix& operator=(const ", param_as, " spvStorageMatrix& m) ",
+						          method_as, " = default;");
+					}
+					if (param_idx != method_idx && param_idx >= 4)
+						statement("#endif");
+					statement("");
+				}
+				statement("operator matrix<T, Cols, Rows>() const ", method_as);
+				begin_scope();
+				statement("matrix<T, Cols, Rows> m;");
+				statement("for (int i = 0; i < Cols; ++i)");
+				statement("    m.columns[i] = columns[i];");
+				statement("return m;");
+				end_scope();
+				statement("");
+				statement("vec<T, Rows> operator[](size_t idx) const ", method_as);
+				begin_scope();
+				statement("return columns[idx];");
+				end_scope();
+				if (method_idx != 1) // constant
+				{
+					statement(method_as, " vec<T, Rows>& operator[](size_t idx) ", method_as);
+					begin_scope();
+					statement("return columns[idx];");
+					end_scope();
+				}
+				if (method_idx >= 4)
+					statement("#endif");
+				statement("");
+			}
+			end_scope_decl();
+			statement("");
+			statement("template<typename T, int Cols, int Rows>");
+			statement("matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)");
+			begin_scope();
+			statement("return transpose(matrix<T, Cols, Rows>(m));");
+			end_scope();
+			statement("");
+			statement("typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;");
+			statement("typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;");
+			statement("typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;");
+			statement("typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;");
+			statement("typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;");
+			statement("typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;");
+			statement("typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;");
+			statement("typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;");
+			statement("typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;");
+			statement("typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;");
+			statement("typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;");
+			statement("typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;");
+			statement("typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;");
+			statement("typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;");
+			statement("typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;");
+			statement("typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;");
+			statement("typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;");
+			statement("typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;");
+			statement("");
+			break;
+
 		default:
 			break;
 		}
@ -10872,12 +11031,23 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 		else if (!is_scalar(physical_type)) // scalar type is already packed.
 			pack_pfx = "packed_";
 	}
-	else if (row_major)
+	else if (is_matrix(physical_type))
 	{
-		// Need to declare type with flipped vecsize/columns.
-		row_major_physical_type = physical_type;
-		swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
-		declared_type = &row_major_physical_type;
+		if (!msl_options.supports_msl_version(3, 0) &&
+		    has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
+		{
+			pack_pfx = "spvStorage_";
+			add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
+			// The pack prefix causes problems with array<T> wrappers.
+			is_using_builtin_array = true;
+		}
+		if (row_major)
+		{
+			// Need to declare type with flipped vecsize/columns.
+			row_major_physical_type = physical_type;
+			swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
+			declared_type = &row_major_physical_type;
+		}
 	}

 	// Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
@ -10907,8 +11077,8 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 		array_type = type_to_array_glsl(physical_type);
 	}

-	auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
-	                   member_attribute_qualifier(type, index), array_type, ";");
+	auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id, true), " ", qualifier,
+	                   to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";");

 	is_using_builtin_array = false;
 	return result;
@ -13581,7 +13751,7 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id)
 // The optional id parameter indicates the object whose type we are trying
 // to find the description for. It is optional. Most type descriptions do not
 // depend on a specific object's use of that type.
-string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
+string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
 {
 	string type_name;

@ -13671,9 +13841,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		// Need to special-case threadgroup booleans. They are supposed to be logical
 		// storage, but MSL compilers will sometimes crash if you use threadgroup bool.
 		// Workaround this by using 16-bit types instead and fixup on load-store to this data.
-		// FIXME: We have no sane way of working around this problem if a struct member is boolean
-		// and that struct is used as a threadgroup variable, but ... sigh.
-		if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup)
+		if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup || member)
 			type_name = "short";
 		else
 			type_name = "bool";
@ -13735,7 +13903,24 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)

 	// Matrix?
 	if (type.columns > 1)
+	{
+		auto *var = maybe_get_backing_variable(id);
+		if (var && var->basevariable)
+			var = &get<SPIRVariable>(var->basevariable);
+
+		// Need to special-case threadgroup matrices. Due to an oversight, Metal's
+		// matrix struct prior to Metal 3 lacks constructors in the threadgroup AS,
+		// preventing us from default-constructing or initializing matrices in threadgroup storage.
+		// Work around this by using our own type as storage.
+		if (((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup) &&
+		    !msl_options.supports_msl_version(3, 0))
+		{
+			add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
+			type_name = "spvStorage_" + type_name;
+		}
+
 		type_name += to_string(type.columns) + "x";
+	}

 	// Vector or Matrix?
 	if (type.vecsize > 1)
@ -13765,6 +13950,11 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	}
 }

+string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
+{
+	return type_to_glsl(type, id, false);
+}
+
 string CompilerMSL::type_to_array_glsl(const SPIRType &type)
 {
 	// Allow Metal to use the array<T> template to make arrays a value type
@ -15786,13 +15976,40 @@ void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t

 void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
 {
+	bool is_packed = has_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *source_expr = maybe_get<SPIRExpression>(source_id);
 	auto *var = maybe_get_backing_variable(source_id);
+	const SPIRType *var_type, *phys_type;
+	if (uint32_t phys_id = get_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypeID))
+		phys_type = &get<SPIRType>(phys_id);
+	else
+		phys_type = &expr_type;
 	if (var)
+	{
 		source_id = var->self;
+		var_type = &get_variable_data_type(*var);
+	}

 	// Type fixups for workgroup variables if they are booleans.
-	if (var && var->storage == StorageClassWorkgroup && expr_type.basetype == SPIRType::Boolean)
+	if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) &&
+	    expr_type.basetype == SPIRType::Boolean)
 		expr = join(type_to_glsl(expr_type), "(", expr, ")");
+	// Type fixups for workgroup variables if they are matrices.
+	// Don't do fixup for packed types; those are handled specially.
+	// FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
+	if (!msl_options.supports_msl_version(3, 0) && var &&
+	    (var->storage == StorageClassWorkgroup ||
+	     (var_type->basetype == SPIRType::Struct &&
+	      has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
+	    expr_type.columns > 1)
+	{
+		SPIRType matrix_type = *phys_type;
+		if (source_expr && source_expr->need_transpose)
+			swap(matrix_type.vecsize, matrix_type.columns);
+		matrix_type.array.clear();
+		matrix_type.array_size_literal.clear();
+		expr = join(type_to_glsl(matrix_type), "(", expr, ")");
+	}

 	// Only interested in standalone builtin variables in the switch below.
 	if (!has_decoration(source_id, DecorationBuiltIn))
@ -15885,17 +16102,42 @@ void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr,

 void CompilerMSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
 {
+	bool is_packed = has_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *target_expr = maybe_get<SPIRExpression>(target_id);
 	auto *var = maybe_get_backing_variable(target_id);
+	const SPIRType *var_type, *phys_type;
+	if (uint32_t phys_id = get_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypeID))
+		phys_type = &get<SPIRType>(phys_id);
+	else
+		phys_type = &expr_type;
 	if (var)
+	{
 		target_id = var->self;
+		var_type = &get_variable_data_type(*var);
+	}

 	// Type fixups for workgroup variables if they are booleans.
-	if (var && var->storage == StorageClassWorkgroup && expr_type.basetype == SPIRType::Boolean)
+	if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) &&
+	    expr_type.basetype == SPIRType::Boolean)
 	{
 		auto short_type = expr_type;
 		short_type.basetype = SPIRType::Short;
 		expr = join(type_to_glsl(short_type), "(", expr, ")");
 	}
+	// Type fixups for workgroup variables if they are matrices.
+	// Don't do fixup for packed types; those are handled specially.
+	// FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
+	if (!msl_options.supports_msl_version(3, 0) && var &&
+	    (var->storage == StorageClassWorkgroup ||
+	     (var_type->basetype == SPIRType::Struct &&
+	      has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
+	    expr_type.columns > 1)
+	{
+		SPIRType matrix_type = *phys_type;
+		if (target_expr && target_expr->need_transpose)
+			swap(matrix_type.vecsize, matrix_type.columns);
+		expr = join("spvStorage_", type_to_glsl(matrix_type), "(", expr, ")");
+	}

 	// Only interested in standalone builtin variables.
 	if (!has_decoration(target_id, DecorationBuiltIn))
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -665,6 +665,7 @@ protected:
 		SPVFuncImplQuantizeToF16,
 		SPVFuncImplCubemapTo2DArrayFace,
 		SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
+		SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices
 		SPVFuncImplInverse4x4,
 		SPVFuncImplInverse3x3,
 		SPVFuncImplInverse2x2,
@ -736,6 +737,7 @@ protected:
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
 	void emit_struct_padding_target(const SPIRType &type) override;
+	std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member);
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	void emit_block_hints(const SPIRBlock &block) override;

@ -796,6 +798,7 @@ protected:
 	void extract_global_variables_from_functions();
 	void mark_packable_structs();
 	void mark_as_packable(SPIRType &type);
+	void mark_as_workgroup_struct(SPIRType &type);

 	std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
 	void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,