Merge branch 'master' of https://github.com/KhronosGroup/SPIRV-Cross

2017-01-24 13:58:22 -08:00 · 2017-01-24 13:58:22 -08:00 · e3dac18633
commit e3dac18633
parent ef1f860c01 7daba19223
42 changed files with 661 additions and 157 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -24,3 +24,4 @@ script:
  - make -j2
  - PATH=./glslang/StandAlone:./SPIRV-Tools/tools:$PATH
  - ./test_shaders.py shaders
+  - ./test_shaders.py --metal shaders-msl
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -93,6 +93,9 @@ if (${PYTHONINTERP_FOUND})
 	add_test(NAME spirv-cross-test
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders)
+	add_test(NAME spirv-cross-test-metal
+		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --metal
+			${CMAKE_CURRENT_SOURCE_DIR}/shaders-msl)
  endif()
 else()
  message(WARNING "Testing disabled. Could not find python3. If you have python3 installed try running "
--- a/README.md
+++ b/README.md
@ -171,6 +171,8 @@ In these cases, run `./test_shaders.py shaders --update` to update the reference
 Always make sure you are running up to date glslangValidator as well as SPIRV-Tools when updating reference files.

 In short, the master branch should always be able to run `./test_shaders.py shaders` without failure.
+SPIRV-Cross uses Travis CI to test all pull requests, so it is not strictly needed to perform testing yourself if you have problems running it locally.
+A pull request which does not pass testing on Travis will not be accepted however.

 When adding support for new features to SPIRV-Cross, a new shader and reference file should be added which covers usage of the new shader features in question.

@ -205,6 +207,10 @@ The current reference output is contained in reference/.

 See `./test_shaders.py --help` for more.

+### Metal backend
+
+To test the roundtrip path GLSL -> SPIR-V -> MSL, `--metal` can be added, e.g. `./test_shaders.py --metal shaders-msl`.
+
 ### Updating regression tests

 When legitimate changes are found, use `--update` flag to update regression files.
--- a/main.cpp
+++ b/main.cpp
@ -671,8 +671,12 @@ int main(int argc, char *argv[])
 		res = compiler->get_shader_resources();

 	if (args.flatten_ubo)
+	{
 		for (auto &ubo : res.uniform_buffers)
 			compiler->flatten_buffer_block(ubo.id);
+		for (auto &ubo : res.push_constant_buffers)
+			compiler->flatten_buffer_block(ubo.id);
+	}

 	auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs);
 	auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr);
--- a/reference/shaders-msl/vert/basic.vert
+++ b/reference/shaders-msl/vert/basic.vert
@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(0)]];
+};
+
+struct main0_out
+{
+    float3 vNormal [[user(locn0)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize;
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = _16.uMVP * in.aVertex;
+    out.vNormal = in.aNormal;
+    out.gl_Position.y = -(out.gl_Position.y);    // Invert Y-axis for Metal
+    return out;
+}
+
--- a/reference/shaders/comp/basic.comp
+++ b/reference/shaders/comp/basic.comp
@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    vec4 in_data[];
 } _23;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _45;
--- a/reference/shaders/comp/culling.comp
+++ b/reference/shaders/comp/culling.comp
@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    float in_data[];
 } _22;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    float out_data[];
 } _38;
--- a/reference/shaders/comp/dowhile.comp
+++ b/reference/shaders/comp/dowhile.comp
@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    mat4 mvp;
    vec4 in_data[];
 } _28;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _52;
--- a/reference/shaders/comp/generate_height.comp
+++ b/reference/shaders/comp/generate_height.comp
@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer Distribution
+layout(binding = 0, std430) readonly buffer Distribution
 {
    vec2 distribution[];
 } _190;
@ -11,7 +11,7 @@ layout(binding = 2, std140) uniform UBO
    vec4 uModTime;
 } _218;

-layout(binding = 1, std430) buffer HeightmapFFT
+layout(binding = 1, std430) writeonly buffer HeightmapFFT
 {
    uint heights[];
 } _276;
--- a/reference/shaders/comp/inout-struct.invalid.comp
+++ b/reference/shaders/comp/inout-struct.invalid.comp
@ -9,17 +9,17 @@ struct Foo
    vec4 d;
 };

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) readonly buffer SSBO2
 {
    vec4 data[];
 } indata;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) writeonly buffer SSBO
 {
    vec4 data[];
 } outdata;

-layout(binding = 2, std430) buffer SSBO3
+layout(binding = 2, std430) readonly buffer SSBO3
 {
    Foo foos[];
 } foobar;
--- a/reference/shaders/comp/insert.comp
+++ b/reference/shaders/comp/insert.comp
@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) writeonly buffer SSBO
 {
    vec4 out_data[];
 } _27;
--- a/reference/shaders/comp/loop.comp
+++ b/reference/shaders/comp/loop.comp
@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    mat4 mvp;
    vec4 in_data[];
 } _24;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _177;
--- a/reference/shaders/comp/mat3.comp
+++ b/reference/shaders/comp/mat3.comp
@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    mat3 out_data[];
 } _22;
--- a/reference/shaders/comp/mod.comp
+++ b/reference/shaders/comp/mod.comp
@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    vec4 in_data[];
 } _23;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _33;
--- a/reference/shaders/comp/modf.comp
+++ b/reference/shaders/comp/modf.comp
@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    vec4 in_data[];
 } _23;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _35;
--- a/reference/shaders/comp/read-write-only.comp
+++ b/reference/shaders/comp/read-write-only.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 2, std430) restrict writeonly buffer SSBO2
+{
+    vec4 data4;
+    vec4 data5;
+} _10;
+
+layout(binding = 0, std430) readonly buffer SSBO0
+{
+    vec4 data0;
+    vec4 data1;
+} _15;
+
+layout(binding = 1, std430) restrict buffer SSBO1
+{
+    vec4 data2;
+    vec4 data3;
+} _21;
+
+void main()
+{
+    _10.data4 = _15.data0 + _21.data2;
+    _10.data5 = _15.data1 + _21.data3;
+}
+
--- a/reference/shaders/comp/return.comp
+++ b/reference/shaders/comp/return.comp
@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _27;
--- a/reference/shaders/comp/shared.comp
+++ b/reference/shaders/comp/shared.comp
@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    float in_data[];
 } _22;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    float out_data[];
 } _44;
--- a/reference/shaders/comp/struct-layout.comp
+++ b/reference/shaders/comp/struct-layout.comp
@ -6,12 +6,12 @@ struct Foo
    mat4 m;
 };

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    Foo out_data[];
 } _23;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    Foo in_data[];
 } _30;
--- a/reference/shaders/comp/torture-loop.comp
+++ b/reference/shaders/comp/torture-loop.comp
@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
    mat4 mvp;
    vec4 in_data[];
 } _24;

-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
    vec4 out_data[];
 } _89;
--- a/reference/shaders/flatten/array.flatten.vert
+++ b/reference/shaders/flatten/array.flatten.vert
@ -1,10 +1,12 @@
 #version 310 es

-uniform vec4 UBO[14];
+uniform vec4 UBO[56];
 in vec4 aVertex;

 void main()
 {
-    gl_Position = (mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + UBO[13];
+    vec4 a4 = UBO[23];
+    vec4 offset = (UBO[50] + UBO[45]) + vec4(UBO[54].x);
+    gl_Position = ((mat4(UBO[40], UBO[41], UBO[42], UBO[43]) * aVertex) + UBO[55]) + offset;
 }

--- a/reference/shaders/flatten/matrixindex.flatten.vert
+++ b/reference/shaders/flatten/matrixindex.flatten.vert
@ -0,0 +1,19 @@
+#version 310 es
+
+uniform vec4 UBO[14];
+out vec4 oA;
+out vec4 oB;
+out vec4 oC;
+out vec4 oD;
+out vec4 oE;
+
+void main()
+{
+    gl_Position = vec4(0.0);
+    oA = UBO[1];
+    oB = vec4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y);
+    oC = UBO[9];
+    oD = vec4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x);
+    oE = vec4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y);
+}
+
--- a/reference/shaders/flatten/push-constant.flatten.vert
+++ b/reference/shaders/flatten/push-constant.flatten.vert
@ -0,0 +1,13 @@
+#version 310 es
+
+uniform vec4 PushMe[6];
+layout(location = 1) in vec4 Pos;
+layout(location = 0) out vec2 vRot;
+layout(location = 0) in vec2 Rot;
+
+void main()
+{
+    gl_Position = mat4(PushMe[0], PushMe[1], PushMe[2], PushMe[3]) * Pos;
+    vRot = (mat2(PushMe[4].xy, PushMe[4].zw) * Rot) + vec2(PushMe[5].z);
+}
+
--- a/reference/shaders/flatten/rowmajor.flatten.vert
+++ b/reference/shaders/flatten/rowmajor.flatten.vert
@ -1,10 +1,11 @@
 #version 310 es

-uniform vec4 UBO[8];
+uniform vec4 UBO[12];
 in vec4 aVertex;

 void main()
 {
+    vec2 v = mat4x2(UBO[8].xy, UBO[9].xy, UBO[10].xy, UBO[11].xy) * aVertex;
    gl_Position = (mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + (aVertex * mat4(UBO[4], UBO[5], UBO[6], UBO[7]));
 }

--- a/reference/shaders/flatten/struct.rowmajor.flatten.vert
+++ b/reference/shaders/flatten/struct.rowmajor.flatten.vert
@ -0,0 +1,25 @@
+#version 310 es
+
+struct Foo
+{
+    mat3x4 MVP0;
+    mat3x4 MVP1;
+};
+
+uniform vec4 UBO[8];
+layout(location = 0) in vec4 v0;
+layout(location = 1) in vec4 v1;
+layout(location = 0) out vec3 V0;
+layout(location = 1) out vec3 V1;
+
+void main()
+{
+    Foo f;
+    f.MVP0 = Foo(transpose(mat4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(mat4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))).MVP0;
+    f.MVP1 = Foo(transpose(mat4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(mat4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))).MVP1;
+    vec3 a = v0 * f.MVP0;
+    vec3 b = v1 * f.MVP1;
+    V0 = a;
+    V1 = b;
+}
+
--- a/reference/shaders/flatten/types.flatten.frag
+++ b/reference/shaders/flatten/types.flatten.frag
@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+uniform mediump ivec4 UBO1[2];
+uniform mediump uvec4 UBO2[2];
+uniform vec4 UBO0[2];
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = ((((vec4(UBO1[0]) + vec4(UBO1[1])) + vec4(UBO2[0])) + vec4(UBO2[1])) + UBO0[0]) + UBO0[1];
+}
+
--- a/shaders-msl/vert/basic.vert
+++ b/shaders-msl/vert/basic.vert
@ -0,0 +1,15 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    uniform mat4 uMVP;
+};
+in vec4 aVertex;
+in vec3 aNormal;
+out vec3 vNormal;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+    vNormal = aNormal;
+}
--- a/shaders/comp/read-write-only.comp
+++ b/shaders/comp/read-write-only.comp
@ -0,0 +1,26 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) readonly buffer SSBO0
+{
+   vec4 data0;
+   vec4 data1;
+};
+
+layout(binding = 1, std430) restrict buffer SSBO1
+{
+   vec4 data2;
+   vec4 data3;
+};
+
+layout(binding = 2, std430) restrict writeonly buffer SSBO2
+{
+   vec4 data4;
+   vec4 data5;
+};
+
+void main()
+{
+   data4 = data0 + data2;
+   data5 = data1 + data3;
+}
--- a/shaders/flatten/array.flatten.vert
+++ b/shaders/flatten/array.flatten.vert
@ -2,9 +2,10 @@

 layout(std140) uniform UBO
 {
-    uniform mat4 uMVP;
+    vec4 A4[5][4][2];
+    mat4 uMVP;
    vec4 A1[2];
-    vec4 A2[2][2];
+    vec4 A2[2][3];
    float A3[3];
    vec4 Offset;
 };
@ -12,5 +13,7 @@ in vec4 aVertex;

 void main()
 {
-    gl_Position = uMVP * aVertex + Offset;
+    vec4 a4 = A4[2][3][1]; // 2 * (4 * 2) + 3 * 2 + 1 = 16 + 6 + 1 = 23.
+    vec4 offset = A2[1][1] + A1[1] + A3[2];
+    gl_Position = uMVP * aVertex + Offset + offset;
 }
--- a/shaders/flatten/basic.flatten.vert
+++ b/shaders/flatten/basic.flatten.vert
@ -2,7 +2,7 @@

 layout(std140) uniform UBO
 {
-    uniform mat4 uMVP;
+    mat4 uMVP;
 };
 in vec4 aVertex;
 in vec3 aNormal;
--- a/shaders/flatten/matrixindex.flatten.vert
+++ b/shaders/flatten/matrixindex.flatten.vert
@ -0,0 +1,25 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    layout(column_major) mat4 M1C;
+    layout(row_major) mat4 M1R;
+    layout(column_major) mat2x4 M2C;
+    layout(row_major) mat2x4 M2R;
+};
+
+out vec4 oA;
+out vec4 oB;
+out vec4 oC;
+out vec4 oD;
+out vec4 oE;
+
+void main()
+{
+	gl_Position = vec4(0.0);
+	oA = M1C[1];
+	oB = M1R[1];
+	oC = M2C[1];
+	oD = M2R[0];
+	oE = vec4(M1C[1][2], M1R[1][2], M2C[1][2], M2R[1][2]);
+}
--- a/shaders/flatten/push-constant.flatten.vert
+++ b/shaders/flatten/push-constant.flatten.vert
@ -0,0 +1,17 @@
+#version 310 es
+
+layout(push_constant, std430) uniform PushMe
+{
+   mat4 MVP;
+   mat2 Rot; // The MatrixStride will be 8 here.
+   float Arr[4];
+} registers;
+
+layout(location = 0) in vec2 Rot;
+layout(location = 1) in vec4 Pos;
+layout(location = 0) out vec2 vRot;
+void main()
+{
+   gl_Position = registers.MVP * Pos;
+   vRot = registers.Rot * Rot + registers.Arr[2]; // Constant access should work even if array stride is just 4 here.
+}
--- a/shaders/flatten/rowmajor.flatten.vert
+++ b/shaders/flatten/rowmajor.flatten.vert
@ -4,11 +4,13 @@ layout(std140) uniform UBO
 {
    layout(column_major) mat4 uMVPR;
    layout(row_major) mat4 uMVPC;
+    layout(row_major) mat2x4 uMVP;
 };

 in vec4 aVertex;

 void main()
 {
+	vec2 v = aVertex * uMVP;
 	gl_Position = uMVPR * aVertex + uMVPC * aVertex;
 }
--- a/shaders/flatten/struct.rowmajor.flatten.vert
+++ b/shaders/flatten/struct.rowmajor.flatten.vert
@ -0,0 +1,26 @@
+#version 310 es
+
+struct Foo
+{
+   mat3x4 MVP0;
+   mat3x4 MVP1;
+};
+
+layout(std140, binding = 0) uniform UBO
+{
+   layout(row_major) Foo foo;
+};
+
+layout(location = 0) in vec4 v0;
+layout(location = 1) in vec4 v1;
+layout(location = 0) out vec3 V0;
+layout(location = 1) out vec3 V1;
+
+void main()
+{
+   Foo f = foo;
+   vec3 a = v0 * f.MVP0;
+   vec3 b = v1 * f.MVP1;
+   V0 = a;
+   V1 = b;
+}
--- a/shaders/flatten/swizzle.flatten.vert
+++ b/shaders/flatten/swizzle.flatten.vert
@ -4,27 +4,27 @@
 layout(std140) uniform UBO
 {
    // 16b boundary
-    uniform vec4 A;
+    vec4 A;
    // 16b boundary
-    uniform vec2 B0;
-    uniform vec2 B1;
+    vec2 B0;
+    vec2 B1;
    // 16b boundary
-    uniform float C0;
+    float C0;
    // 16b boundary (vec3 is aligned to 16b)
-    uniform vec3 C1;
+    vec3 C1;
    // 16b boundary
-    uniform vec3 D0;
-    uniform float D1;
+    vec3 D0;
+    float D1;
    // 16b boundary
-    uniform float E0;
-    uniform float E1;
-    uniform float E2;
-    uniform float E3;
+    float E0;
+    float E1;
+    float E2;
+    float E3;
    // 16b boundary
-    uniform float F0;
-    uniform vec2 F1;
+    float F0;
+    vec2 F1;
    // 16b boundary (vec2 before us is aligned to 8b)
-    uniform float F2;
+    float F2;
 };

 out vec4 oA, oB, oC, oD, oE, oF;
--- a/shaders/flatten/types.flatten.frag
+++ b/shaders/flatten/types.flatten.frag
@ -0,0 +1,27 @@
+#version 310 es
+precision mediump float;
+
+layout(std140, binding = 0) uniform UBO0
+{
+   vec4 a;
+   vec4 b;
+};
+
+layout(std140, binding = 0) uniform UBO1
+{
+   ivec4 c;
+   ivec4 d;
+};
+
+layout(std140, binding = 0) uniform UBO2
+{
+   uvec4 e;
+   uvec4 f;
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+   FragColor = vec4(c) + vec4(d) + vec4(e) + vec4(f) + a + b; 
+}
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -265,6 +265,10 @@ struct SPIRType : IVariant
 	// Since we cannot rely on OpName to be equal, we need to figure out aliases.
 	uint32_t type_alias = 0;

+	// Denotes the type which this type is based on.
+	// Allows the backend to traverse how a complex type is built up during access chains.
+	uint32_t parent_type = 0;
+
 	// Used in backends to avoid emitting members with conflicting names.
 	std::unordered_set<std::string> member_name_cache;
 };
@ -904,6 +908,7 @@ struct Meta
 		uint32_t binding = 0;
 		uint32_t offset = 0;
 		uint32_t array_stride = 0;
+		uint32_t matrix_stride = 0;
 		uint32_t input_attachment = 0;
 		uint32_t spec_id = 0;
 		bool builtin = false;
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -842,6 +842,10 @@ void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 		dec.spec_id = argument;
 		break;

+	case DecorationMatrixStride:
+		dec.matrix_stride = argument;
+		break;
+
 	default:
 		break;
 	}
@ -961,6 +965,10 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 		dec.array_stride = argument;
 		break;

+	case DecorationMatrixStride:
+		dec.matrix_stride = argument;
+		break;
+
 	case DecorationBinding:
 		dec.binding = argument;
 		break;
@ -1020,6 +1028,10 @@ uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
 		return dec.input_attachment;
 	case DecorationSpecId:
 		return dec.spec_id;
+	case DecorationArrayStride:
+		return dec.array_stride;
+	case DecorationMatrixStride:
+		return dec.matrix_stride;
 	default:
 		return 1;
 	}
@ -1271,6 +1283,7 @@ void Compiler::parse(const Instruction &instruction)
 		vecbase = base;
 		vecbase.vecsize = vecsize;
 		vecbase.self = id;
+		vecbase.parent_type = ops[1];
 		break;
 	}

@ -1285,6 +1298,7 @@ void Compiler::parse(const Instruction &instruction)
 		matrixbase = base;
 		matrixbase.columns = colcount;
 		matrixbase.self = id;
+		matrixbase.parent_type = ops[1];
 		break;
 	}

@ -1302,6 +1316,7 @@ void Compiler::parse(const Instruction &instruction)

 		arraybase.array_size_literal.push_back(literal);
 		arraybase.array.push_back(literal ? c->scalar() : ops[2]);
+		arraybase.parent_type = ops[1];
 		// Do NOT set arraybase.self!
 		break;
 	}
@ -1316,6 +1331,7 @@ void Compiler::parse(const Instruction &instruction)
 		arraybase = base;
 		arraybase.array.push_back(0);
 		arraybase.array_size_literal.push_back(true);
+		arraybase.parent_type = ops[1];
 		// Do NOT set arraybase.self!
 		break;
 	}
@ -1371,6 +1387,8 @@ void Compiler::parse(const Instruction &instruction)
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;

+		ptrbase.parent_type = ops[2];
+
 		// Do NOT set ptrbase.self!
 		break;
 	}
@ -2045,6 +2063,17 @@ uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_
 		SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
 }

+uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
+{
+	// Decoration must be set in valid SPIR-V, otherwise throw.
+	// MatrixStride is part of OpMemberDecorate.
+	auto &dec = meta[type.self].members[index];
+	if (dec.decoration_flags & (1ull << DecorationMatrixStride))
+		return dec.matrix_stride;
+	else
+		SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
+}
+
 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
 {
 	uint32_t last = uint32_t(type.member_types.size() - 1);
@ -2067,7 +2096,7 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 	case SPIRType::Image:
 	case SPIRType::SampledImage:
 	case SPIRType::Sampler:
-		SPIRV_CROSS_THROW("Querying size for object with opaque size.\n");
+		SPIRV_CROSS_THROW("Querying size for object with opaque size.");

 	default:
 		break;
@ -2084,22 +2113,26 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 	}
 	else
 	{
-		size_t component_size = type.width / 8;
 		unsigned vecsize = type.vecsize;
 		unsigned columns = type.columns;

 		// Vectors.
 		if (columns == 1)
+		{
+			size_t component_size = type.width / 8;
 			return vecsize * component_size;
+		}
 		else
 		{
-			// Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
-			if ((flags & (1ull << DecorationRowMajor)) && columns == 3)
-				columns = 4;
-			else if ((flags & (1ull << DecorationColMajor)) && vecsize == 3)
-				vecsize = 4;
+			uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);

-			return vecsize * columns * component_size;
+			// Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
+			if (flags & (1ull << DecorationRowMajor))
+				return matrix_stride * vecsize;
+			else if (flags & (1ull << DecorationColMajor))
+				return matrix_stride * columns;
+			else
+				SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
 		}
 	}
 }
@ -3008,3 +3041,48 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 		this->get<SPIRVariable>(loop_variable.first).loop_variable = true;
 	}
 }
+
+uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
+{
+	auto &type = get<SPIRType>(var.basetype);
+	assert(type.basetype == SPIRType::Struct);
+
+	// Some flags like non-writable, non-readable are actually found
+	// as member decorations. If all members have a decoration set, propagate
+	// the decoration up as a regular variable decoration.
+	uint64_t base_flags = meta[var.self].decoration.decoration_flags;
+
+	if (type.member_types.empty())
+		return base_flags;
+
+	uint64_t all_members_flag_mask = ~(0ull);
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+		all_members_flag_mask &= get_member_decoration_mask(type.self, i);
+
+	return base_flags | all_members_flag_mask;
+}
+
+bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
+{
+	if (type.basetype == SPIRType::Struct)
+	{
+		base_type = SPIRType::Unknown;
+		for (auto &member_type : type.member_types)
+		{
+			SPIRType::BaseType member_base;
+			if (!get_common_basic_type(get<SPIRType>(member_type), member_base))
+				return false;
+
+			if (base_type == SPIRType::Unknown)
+				base_type = member_base;
+			else if (base_type != member_base)
+				return false;
+		}
+		return true;
+	}
+	else
+	{
+		base_type = type.basetype;
+		return true;
+	}
+}
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@ -459,6 +459,7 @@ protected:

 	uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const;
 	uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const;
+	uint32_t type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const;

 	bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const;

@ -576,6 +577,9 @@ protected:
 	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const;

 	VariableTypeRemapCallback variable_remap_callback;
+
+	uint64_t get_buffer_block_flags(const SPIRVariable &var);
+	bool get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type);
 };
 }

--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -18,6 +18,7 @@
 #include "GLSL.std.450.h"
 #include <algorithm>
 #include <assert.h>
+#include <utility>

 using namespace spv;
 using namespace spirv_cross;
@ -974,7 +975,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)

 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
 {
-	if (options.vulkan_semantics)
+	if (flattened_buffer_blocks.count(var.self))
+		emit_buffer_block_flattened(var);
+	else if (options.vulkan_semantics)
 		emit_push_constant_block_vulkan(var);
 	else
 		emit_push_constant_block_glsl(var);
@ -1016,17 +1019,11 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
 {
 	if (flattened_buffer_blocks.count(var.self))
-	{
 		emit_buffer_block_flattened(var);
-	}
 	else if (is_legacy())
-	{
 		emit_buffer_block_legacy(var);
-	}
 	else
-	{
 		emit_buffer_block_native(var);
-	}
 }

 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
@ -1050,8 +1047,12 @@ void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
+
+	uint64_t flags = get_buffer_block_flags(var);
 	bool ssbo = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0;
-	bool is_restrict = (meta[var.self].decoration.decoration_flags & (1ull << DecorationRestrict)) != 0;
+	bool is_restrict = ssbo && (flags & (1ull << DecorationRestrict)) != 0;
+	bool is_writeonly = ssbo && (flags & (1ull << DecorationNonReadable)) != 0;
+	bool is_readonly = ssbo && (flags & (1ull << DecorationNonWritable)) != 0;

 	add_resource_name(var.self);

@ -1065,7 +1066,9 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 	else
 		resource_names.insert(buffer_name);

-	statement(layout_for_variable(var), is_restrict ? "restrict " : "", ssbo ? "buffer " : "uniform ", buffer_name);
+	statement(layout_for_variable(var), is_restrict ? "restrict " : "", is_writeonly ? "writeonly " : "",
+	          is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", buffer_name);
+
 	begin_scope();

 	type.member_name_cache.clear();
@ -1092,7 +1095,21 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
 	auto buffer_name = to_name(type.self, false);
 	size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;

-	statement("uniform vec4 ", buffer_name, "[", buffer_size, "];");
+	SPIRType::BaseType basic_type;
+	if (get_common_basic_type(type, basic_type))
+	{
+		SPIRType tmp;
+		tmp.basetype = basic_type;
+		tmp.vecsize = 4;
+		if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
+			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
+
+		auto flags = get_buffer_block_flags(var);
+		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
+		          buffer_size, "];");
+	}
+	else
+		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
 }

 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
@ -3287,40 +3304,37 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 }

 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
-                                  bool *need_transpose)
+                                  bool *out_need_transpose)
 {
 	if (flattened_buffer_blocks.count(base))
 	{
-		if (need_transpose)
-			flattened_access_chain_offset(base, indices, count, 0, need_transpose);
+		uint32_t matrix_stride;
+		bool need_transpose;
+		flattened_access_chain_offset(base, indices, count, 0, &need_transpose, &matrix_stride);

-		return flattened_access_chain(base, indices, count, target_type, 0);
+		if (out_need_transpose)
+			*out_need_transpose = target_type.columns > 1 && need_transpose;
+
+		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
 	}
 	else
 	{
-		return access_chain(base, indices, count, false, false, need_transpose);
+		return access_chain(base, indices, count, false, false, out_need_transpose);
 	}
 }

 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                 const SPIRType &target_type, uint32_t offset)
+                                                 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+                                                 bool need_transpose)
 {
 	if (!target_type.array.empty())
-	{
 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
-	}
 	else if (target_type.basetype == SPIRType::Struct)
-	{
 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
-	}
 	else if (target_type.columns > 1)
-	{
-		return flattened_access_chain_matrix(base, indices, count, target_type, offset);
-	}
+		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
 	else
-	{
-		return flattened_access_chain_vector_scalar(base, indices, count, target_type, offset);
-	}
+		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
 }

 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
@ -3328,7 +3342,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 {
 	std::string expr;

-	expr += type_to_glsl(target_type);
+	expr += type_to_glsl_constructor(target_type);
 	expr += "(";

 	for (size_t i = 0; i < target_type.member_types.size(); ++i)
@ -3337,9 +3351,26 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 			expr += ", ";

 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
-		uint32_t member_offset = type_struct_member_offset(target_type, uint32_t(i));
+		uint32_t member_offset = type_struct_member_offset(target_type, i);

-		expr += flattened_access_chain(base, indices, count, member_type, offset + member_offset);
+		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
+		// ahead of time.
+		bool need_transpose = false;
+		uint32_t matrix_stride = 0;
+		if (member_type.columns > 1)
+		{
+			need_transpose = (combined_decoration_for_member(target_type, i) & (1ull << DecorationRowMajor)) != 0;
+			matrix_stride = type_struct_member_matrix_stride(target_type, i);
+		}
+
+		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
+		                                  need_transpose);
+
+		// Cannot forward transpositions, so resolve them here.
+		if (need_transpose)
+			expr += convert_row_major_matrix(tmp);
+		else
+			expr += tmp;
 	}

 	expr += ")";
@ -3348,19 +3379,26 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 }

 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                        const SPIRType &target_type, uint32_t offset)
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
 {
+	assert(matrix_stride);
+	SPIRType tmp_type = target_type;
+	if (need_transpose)
+		swap(tmp_type.vecsize, tmp_type.columns);
+
 	std::string expr;

-	expr += type_to_glsl(target_type);
+	expr += type_to_glsl_constructor(tmp_type);
 	expr += "(";

-	for (uint32_t i = 0; i < target_type.columns; ++i)
+	for (uint32_t i = 0; i < tmp_type.columns; i++)
 	{
 		if (i != 0)
 			expr += ", ";

-		expr += flattened_access_chain_vector_scalar(base, indices, count, target_type, offset + i * 16);
+		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
+		                                      /* need_transpose= */ false);
 	}

 	expr += ")";
@ -3368,41 +3406,85 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 	return expr;
 }

-std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                               const SPIRType &target_type, uint32_t offset)
+std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
 {
-	if (target_type.basetype != SPIRType::Float)
-		SPIRV_CROSS_THROW("Access chains that use non-floating-point base types can not be flattened");
-
 	auto result = flattened_access_chain_offset(base, indices, count, offset);

-	assert(result.second % 4 == 0);
-	uint32_t index = result.second / 4;
-
 	auto buffer_name = to_name(expression_type(base).self);

-	std::string expr;
+	if (need_transpose)
+	{
+		std::string expr;

-	expr += buffer_name;
-	expr += "[";
-	expr += result.first; // this is a series of N1*k1+N2*k2+... that is either empty or ends with a +
-	expr += convert_to_string(index / 4);
-	expr += "]";
+		if (target_type.vecsize > 1)
+		{
+			expr += type_to_glsl_constructor(target_type);
+			expr += "(";
+		}

-	expr += vector_swizzle(target_type.vecsize, index % 4);
+		for (uint32_t i = 0; i < target_type.vecsize; ++i)
+		{
+			if (i != 0)
+				expr += ", ";

-	return expr;
+			uint32_t component_offset = result.second + i * matrix_stride;
+
+			assert(component_offset % (target_type.width / 8) == 0);
+			uint32_t index = component_offset / (target_type.width / 8);
+
+			expr += buffer_name;
+			expr += "[";
+			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
+			expr += convert_to_string(index / 4);
+			expr += "]";
+
+			expr += vector_swizzle(1, index % 4);
+		}
+
+		if (target_type.vecsize > 1)
+		{
+			expr += ")";
+		}
+
+		return expr;
+	}
+	else
+	{
+		assert(result.second % (target_type.width / 8) == 0);
+		uint32_t index = result.second / (target_type.width / 8);
+
+		std::string expr;
+
+		expr += buffer_name;
+		expr += "[";
+		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
+		expr += convert_to_string(index / 4);
+		expr += "]";
+
+		expr += vector_swizzle(target_type.vecsize, index % 4);
+
+		return expr;
+	}
 }

 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
                                                                             uint32_t count, uint32_t offset,
-                                                                             bool *need_transpose)
+                                                                             bool *need_transpose,
+                                                                             uint32_t *out_matrix_stride)
 {
 	const auto *type = &expression_type(base);
-	uint32_t type_size = 0;

-	// For resolving array accesses, etc, keep a local copy for poking.
-	SPIRType temp;
+	// This holds the type of the current pointer which we are traversing through.
+	// We always start out from a struct type which is the block.
+	// This is primarily used to reflect the array strides and matrix strides later.
+	// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
+	// accordingly as members of structs are accessed.
+	assert(type->basetype == SPIRType::Struct);
+	uint32_t type_id = 0;
+
+	uint32_t matrix_stride = 0;

 	std::string expr;
 	bool row_major_matrix_needs_conversion = false;
@ -3414,26 +3496,41 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// Arrays
 		if (!type->array.empty())
 		{
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
+			// Here, the type_id will be a type ID for the array type itself.
+			uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
+			if (!array_stride)
+				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");

-			uint32_t array_size = temp.array.back();
-			temp.array.pop_back();
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				// Constant array access.
+				offset += constant->scalar() * array_stride;
+			}
+			else
+			{
+				// Dynamic array access.
+				// FIXME: This will need to change if we support other flattening types than 32-bit.
+				const uint32_t word_stride = 16;
+				if (array_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW(
+					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
+					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
+					    "This cannot be flattened. Try using std140 layout instead.");
+				}

-			assert(type_size > 0);
-			assert(type_size % array_size == 0);
+				expr += to_expression(index);
+				expr += " * ";
+				expr += convert_to_string(array_stride / word_stride);
+				expr += " + ";
+			}

-			uint32_t array_stride = type_size / array_size;
-			assert(array_stride % 16 == 0);
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(parent_type);
+			type_id = parent_type;

-			expr += to_expression(index);
-			expr += " * ";
-			expr += convert_to_string(array_stride / 16);
-			expr += " + ";
-
-			type_size = array_stride;
+			// Type ID now refers to the array type with one less dimension.
 		}
 		// For structs, the index refers to a constant, which indexes into the members.
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
@ -3445,30 +3542,32 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Member index is out of bounds!");

 			offset += type_struct_member_offset(*type, index);
+			type_id = type->member_types[index];

-			type_size = uint32_t(get_declared_struct_member_size(*type, index));
-			row_major_matrix_needs_conversion =
-			    (combined_decoration_for_member(*type, index) & (1ull << DecorationRowMajor)) != 0;
+			auto &struct_type = *type;
 			type = &get<SPIRType>(type->member_types[index]);
+
+			if (type->columns > 1)
+			{
+				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
+				row_major_matrix_needs_conversion =
+				    (combined_decoration_for_member(struct_type, index) & (1ull << DecorationRowMajor)) != 0;
+			}
+			else
+				row_major_matrix_needs_conversion = false;
 		}
 		// Matrix -> Vector
 		else if (type->columns > 1)
 		{
-			if (row_major_matrix_needs_conversion)
-				SPIRV_CROSS_THROW("Matrix indexing is not supported for flattened row major matrices!");
-
 			if (ids[index].get_type() != TypeConstant)
 				SPIRV_CROSS_THROW("Cannot flatten dynamic matrix indexing!");

 			index = get<SPIRConstant>(index).scalar();
+			offset += index * (row_major_matrix_needs_conversion ? type->width / 8 : matrix_stride);

-			offset += index * 16;
-
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
-			temp.columns = 1;
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(type->parent_type);
+			type_id = parent_type;
 		}
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
@ -3477,14 +3576,11 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Cannot flatten dynamic vector indexing!");

 			index = get<SPIRConstant>(index).scalar();
+			offset += index * (row_major_matrix_needs_conversion ? matrix_stride : type->width / 8);

-			offset += index * 4;
-
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
-			temp.vecsize = 1;
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(type->parent_type);
+			type_id = parent_type;
 		}
 		else
 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
@ -3492,6 +3588,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin

 	if (need_transpose)
 		*need_transpose = row_major_matrix_needs_conversion;
+	if (out_matrix_stride)
+		*out_matrix_stride = matrix_stride;

 	return std::make_pair(expr, offset);
 }
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -139,8 +139,9 @@ public:
 	void require_extension(const std::string &ext);

 	// Legacy GLSL compatibility method.
-	// Takes a uniform or storage buffer variable and flattens it into a vec4 array[N]; array instead.
-	// For this to work, all types in the block must not be integers or vector of integers.
+	// Takes a uniform or push constant variable and flattens it into a (i|u)vec4 array[N]; array instead.
+	// For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but
+	// mixing int and float is not.
 	// The name of the uniform array will be the same as the interface block name.
 	void flatten_buffer_block(uint32_t id);

@ -319,16 +320,20 @@ protected:
 	                         bool *need_transpose = nullptr);

 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                   const SPIRType &target_type, uint32_t offset);
+	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                   bool need_transpose);
 	std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset);
 	std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                          const SPIRType &target_type, uint32_t offset);
-	std::string flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                                 const SPIRType &target_type, uint32_t offset);
+	                                          const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                          bool need_transpose);
+	std::string flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
+	                                          const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                          bool need_transpose);
 	std::pair<std::string, uint32_t> flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
 	                                                               uint32_t count, uint32_t offset,
-	                                                               bool *need_transpose = nullptr);
+	                                                               bool *need_transpose = nullptr,
+	                                                               uint32_t *matrix_stride = nullptr);

 	const char *index_to_swizzle(uint32_t index);
 	std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr);
--- a/test_shaders.py
+++ b/test_shaders.py
@ -66,6 +66,19 @@ def validate_shader(shader, vulkan):
    else:
        subprocess.check_call(['glslangValidator', shader])

+def cross_compile_msl(shader):
+    spirv_f, spirv_path = tempfile.mkstemp()
+    msl_f, msl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
+    os.close(spirv_f)
+    os.close(msl_f)
+    subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader])
+    spirv_cross_path = './spirv-cross'
+    subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--metal'])
+    subprocess.check_call(['spirv-val', spirv_path])
+
+    # TODO: Add optional validation of the MSL output.
+    return (spirv_path, msl_path)
+
 def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo):
    spirv_f, spirv_path = tempfile.mkstemp()
    glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
@ -212,20 +225,30 @@ def test_shader(stats, shader, update, keep):
            a.append(str(i))
        print(','.join(a), file = stats)

-def test_shaders_helper(stats, shader_dir, update, malisc, keep):
+def test_shader_msl(stats, shader, update, keep):
+    joined_path = os.path.join(shader[0], shader[1])
+    print('Testing MSL shader:', joined_path)
+    spirv, msl = cross_compile_msl(joined_path)
+    regression_check(shader, msl, update, keep)
+    os.remove(spirv)
+
+def test_shaders_helper(stats, shader_dir, update, malisc, keep, backend):
    for root, dirs, files in os.walk(os.path.join(shader_dir)):
        for i in files:
            path = os.path.join(root, i)
            relpath = os.path.relpath(path, shader_dir)
-            test_shader(stats, (shader_dir, relpath), update, keep)
+            if backend == 'metal':
+                test_shader_msl(stats, (shader_dir, relpath), update, keep)
+            else:
+                test_shader(stats, (shader_dir, relpath), update, keep)

-def test_shaders(shader_dir, update, malisc, keep):
+def test_shaders(shader_dir, update, malisc, keep, backend):
    if malisc:
        with open('stats.csv', 'w') as stats:
            print('Shader,OrigRegs,OrigUniRegs,OrigALUShort,OrigLSShort,OrigTEXShort,OrigALULong,OrigLSLong,OrigTEXLong,CrossRegs,CrossUniRegs,CrossALUShort,CrossLSShort,CrossTEXShort,CrossALULong,CrossLSLong,CrossTEXLong', file = stats)
-            test_shaders_helper(stats, shader_dir, update, malisc, keep)
+            test_shaders_helper(stats, shader_dir, update, malisc, keep, backend)
    else:
-        test_shaders_helper(None, shader_dir, update, malisc, keep)
+        test_shaders_helper(None, shader_dir, update, malisc, keep, backend)

 def main():
    parser = argparse.ArgumentParser(description = 'Script for regression testing.')
@ -240,13 +263,16 @@ def main():
    parser.add_argument('--malisc',
            action = 'store_true',
            help = 'Use malisc offline compiler to determine static cycle counts before and after spirv-cross.')
+    parser.add_argument('--metal',
+            action = 'store_true',
+            help = 'Test Metal backend.')
    args = parser.parse_args()

    if not args.folder:
        sys.stderr.write('Need shader folder.\n')
        sys.exit(1)

-    test_shaders(args.folder, args.update, args.malisc, args.keep)
+    test_shaders(args.folder, args.update, args.malisc, args.keep, 'metal' if args.metal else 'glsl')
    if args.malisc:
        print('Stats in stats.csv!')
    print('Tests completed!')