From 016b1d86e99a7e086805d350c8eb203da4ad35f3 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 10:07:38 +0100
Subject: [PATCH 01/17] Emit readonly, writeonly for SSBOs.

---
 reference/shaders/comp/basic.comp             |  4 +--
 reference/shaders/comp/culling.comp           |  4 +--
 reference/shaders/comp/dowhile.comp           |  4 +--
 reference/shaders/comp/generate_height.comp   |  4 +--
 .../shaders/comp/inout-struct.invalid.comp    |  6 ++---
 reference/shaders/comp/insert.comp            |  2 +-
 reference/shaders/comp/loop.comp              |  4 +--
 reference/shaders/comp/mat3.comp              |  2 +-
 reference/shaders/comp/mod.comp               |  4 +--
 reference/shaders/comp/modf.comp              |  4 +--
 reference/shaders/comp/read-write-only.comp   | 27 +++++++++++++++++++
 reference/shaders/comp/return.comp            |  2 +-
 reference/shaders/comp/shared.comp            |  8 +++---
 reference/shaders/comp/struct-layout.comp     |  4 +--
 reference/shaders/comp/torture-loop.comp      |  4 +--
 shaders/comp/read-write-only.comp             | 26 ++++++++++++++++++
 spirv_cross.cpp                               | 15 +++++++++++
 spirv_cross.hpp                               |  2 ++
 spirv_glsl.cpp                                | 10 +++++--
 19 files changed, 106 insertions(+), 30 deletions(-)
 create mode 100644 reference/shaders/comp/read-write-only.comp
 create mode 100644 shaders/comp/read-write-only.comp

diff --git a/reference/shaders/comp/basic.comp b/reference/shaders/comp/basic.comp
index ca2503bd..14850899 100644
--- a/reference/shaders/comp/basic.comp
+++ b/reference/shaders/comp/basic.comp
@@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     vec4 in_data[];
 } _23;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _45;
diff --git a/reference/shaders/comp/culling.comp b/reference/shaders/comp/culling.comp
index cd284b96..fd83bfcb 100644
--- a/reference/shaders/comp/culling.comp
+++ b/reference/shaders/comp/culling.comp
@@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     float in_data[];
 } _22;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     float out_data[];
 } _38;
diff --git a/reference/shaders/comp/dowhile.comp b/reference/shaders/comp/dowhile.comp
index 16ba4001..e717961a 100644
--- a/reference/shaders/comp/dowhile.comp
+++ b/reference/shaders/comp/dowhile.comp
@@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     mat4 mvp;
     vec4 in_data[];
 } _28;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _52;
diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp
index a2128dd8..1367e951 100644
--- a/reference/shaders/comp/generate_height.comp
+++ b/reference/shaders/comp/generate_height.comp
@@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer Distribution
+layout(binding = 0, std430) readonly buffer Distribution
 {
     vec2 distribution[];
 } _190;
@@ -11,7 +11,7 @@ layout(binding = 2, std140) uniform UBO
     vec4 uModTime;
 } _218;
 
-layout(binding = 1, std430) buffer HeightmapFFT
+layout(binding = 1, std430) writeonly buffer HeightmapFFT
 {
     uint heights[];
 } _276;
diff --git a/reference/shaders/comp/inout-struct.invalid.comp b/reference/shaders/comp/inout-struct.invalid.comp
index 1aaa48f2..eec37a19 100644
--- a/reference/shaders/comp/inout-struct.invalid.comp
+++ b/reference/shaders/comp/inout-struct.invalid.comp
@@ -9,17 +9,17 @@ struct Foo
     vec4 d;
 };
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) readonly buffer SSBO2
 {
     vec4 data[];
 } indata;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) writeonly buffer SSBO
 {
     vec4 data[];
 } outdata;
 
-layout(binding = 2, std430) buffer SSBO3
+layout(binding = 2, std430) readonly buffer SSBO3
 {
     Foo foos[];
 } foobar;
diff --git a/reference/shaders/comp/insert.comp b/reference/shaders/comp/insert.comp
index 6c10020c..cbe1e27f 100644
--- a/reference/shaders/comp/insert.comp
+++ b/reference/shaders/comp/insert.comp
@@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) writeonly buffer SSBO
 {
     vec4 out_data[];
 } _27;
diff --git a/reference/shaders/comp/loop.comp b/reference/shaders/comp/loop.comp
index 9853acaa..049a3066 100644
--- a/reference/shaders/comp/loop.comp
+++ b/reference/shaders/comp/loop.comp
@@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     mat4 mvp;
     vec4 in_data[];
 } _24;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _177;
diff --git a/reference/shaders/comp/mat3.comp b/reference/shaders/comp/mat3.comp
index dc302396..2b050f5d 100644
--- a/reference/shaders/comp/mat3.comp
+++ b/reference/shaders/comp/mat3.comp
@@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     mat3 out_data[];
 } _22;
diff --git a/reference/shaders/comp/mod.comp b/reference/shaders/comp/mod.comp
index dfb9cf4c..4be0c5f7 100644
--- a/reference/shaders/comp/mod.comp
+++ b/reference/shaders/comp/mod.comp
@@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     vec4 in_data[];
 } _23;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _33;
diff --git a/reference/shaders/comp/modf.comp b/reference/shaders/comp/modf.comp
index 721d812f..c92149bf 100644
--- a/reference/shaders/comp/modf.comp
+++ b/reference/shaders/comp/modf.comp
@@ -1,12 +1,12 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     vec4 in_data[];
 } _23;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _35;
diff --git a/reference/shaders/comp/read-write-only.comp b/reference/shaders/comp/read-write-only.comp
new file mode 100644
index 00000000..06227ee2
--- /dev/null
+++ b/reference/shaders/comp/read-write-only.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 2, std430) restrict writeonly buffer SSBO2
+{
+    vec4 data4;
+    vec4 data5;
+} _10;
+
+layout(binding = 0, std430) readonly buffer SSBO0
+{
+    vec4 data0;
+    vec4 data1;
+} _15;
+
+layout(binding = 1, std430) restrict buffer SSBO1
+{
+    vec4 data2;
+    vec4 data3;
+} _21;
+
+void main()
+{
+    _10.data4 = _15.data0 + _21.data2;
+    _10.data5 = _15.data1 + _21.data3;
+}
+
diff --git a/reference/shaders/comp/return.comp b/reference/shaders/comp/return.comp
index 20d61d25..4be20e93 100644
--- a/reference/shaders/comp/return.comp
+++ b/reference/shaders/comp/return.comp
@@ -1,7 +1,7 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _27;
diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp
index e2ff6045..9ffcc636 100644
--- a/reference/shaders/comp/shared.comp
+++ b/reference/shaders/comp/shared.comp
@@ -1,15 +1,15 @@
 #version 310 es
 layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     float in_data[];
 } _22;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     float out_data[];
-} _44;
+} _43;
 
 shared float sShared[4];
 
@@ -20,6 +20,6 @@ void main()
     sShared[gl_LocalInvocationIndex] = idata;
     memoryBarrierShared();
     barrier();
-    _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
+    _43.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
 }
 
diff --git a/reference/shaders/comp/struct-layout.comp b/reference/shaders/comp/struct-layout.comp
index 1cbf5dfb..4feea8be 100644
--- a/reference/shaders/comp/struct-layout.comp
+++ b/reference/shaders/comp/struct-layout.comp
@@ -6,12 +6,12 @@ struct Foo
     mat4 m;
 };
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     Foo out_data[];
 } _23;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     Foo in_data[];
 } _30;
diff --git a/reference/shaders/comp/torture-loop.comp b/reference/shaders/comp/torture-loop.comp
index ae183190..645af5c3 100644
--- a/reference/shaders/comp/torture-loop.comp
+++ b/reference/shaders/comp/torture-loop.comp
@@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) readonly buffer SSBO
 {
     mat4 mvp;
     vec4 in_data[];
 } _24;
 
-layout(binding = 1, std430) buffer SSBO2
+layout(binding = 1, std430) writeonly buffer SSBO2
 {
     vec4 out_data[];
 } _89;
diff --git a/shaders/comp/read-write-only.comp b/shaders/comp/read-write-only.comp
new file mode 100644
index 00000000..b224b6f1
--- /dev/null
+++ b/shaders/comp/read-write-only.comp
@@ -0,0 +1,26 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) readonly buffer SSBO0
+{
+   vec4 data0;
+   vec4 data1;
+};
+
+layout(binding = 1, std430) restrict buffer SSBO1
+{
+   vec4 data2;
+   vec4 data3;
+};
+
+layout(binding = 2, std430) restrict writeonly buffer SSBO2
+{
+   vec4 data4;
+   vec4 data5;
+};
+
+void main()
+{
+   data4 = data0 + data2;
+   data5 = data1 + data3;
+}
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 0dc35a39..f91e5b3d 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -3008,3 +3008,18 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 		this->get<SPIRVariable>(loop_variable.first).loop_variable = true;
 	}
 }
+
+uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
+{
+	auto &type = get<SPIRType>(var.basetype);
+
+	// Some flags like non-writable, non-readable are actually found
+	// as member decorations. If all members have a decoration set, propagate
+	// the decoration up as a regular variable decoration.
+	uint64_t base_flags = meta[var.self].decoration.decoration_flags;
+	uint64_t all_members_flag_mask = 0;
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+		all_members_flag_mask |= ~get_member_decoration_mask(type.self, i);
+
+	return base_flags | (~all_members_flag_mask);
+}
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index aa6534da..b919050b 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -576,6 +576,8 @@ protected:
 	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const;
 
 	VariableTypeRemapCallback variable_remap_callback;
+
+	uint64_t get_buffer_block_flags(const SPIRVariable &var);
 };
 }
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 0c47b0c6..315e7e8f 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1050,8 +1050,12 @@ void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
+
+	uint64_t flags = get_buffer_block_flags(var);
 	bool ssbo = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0;
-	bool is_restrict = (meta[var.self].decoration.decoration_flags & (1ull << DecorationRestrict)) != 0;
+	bool is_restrict = ssbo && (flags & (1ull << DecorationRestrict)) != 0;
+	bool is_writeonly = ssbo && (flags & (1ull << DecorationNonReadable)) != 0;
+	bool is_readonly = ssbo && (flags & (1ull << DecorationNonWritable)) != 0;
 
 	add_resource_name(var.self);
 
@@ -1065,7 +1069,9 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 	else
 		resource_names.insert(buffer_name);
 
-	statement(layout_for_variable(var), is_restrict ? "restrict " : "", ssbo ? "buffer " : "uniform ", buffer_name);
+	statement(layout_for_variable(var), is_restrict ? "restrict " : "", is_writeonly ? "writeonly " : "",
+	          is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", buffer_name);
+
 	begin_scope();
 
 	type.member_name_cache.clear();

From 7f787f09dc47f88544d9ea1740850e37546bd071 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 10:27:14 +0100
Subject: [PATCH 02/17] Add flattening support for push constants.

---
 main.cpp       |  4 ++++
 spirv_glsl.cpp | 22 +++++-----------------
 spirv_glsl.hpp |  5 +++--
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/main.cpp b/main.cpp
index 286fbe80..c3443fa7 100644
--- a/main.cpp
+++ b/main.cpp
@@ -671,8 +671,12 @@ int main(int argc, char *argv[])
 		res = compiler->get_shader_resources();
 
 	if (args.flatten_ubo)
+	{
 		for (auto &ubo : res.uniform_buffers)
 			compiler->flatten_buffer_block(ubo.id);
+		for (auto &ubo : res.push_constant_buffers)
+			compiler->flatten_buffer_block(ubo.id);
+	}
 
 	auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs);
 	auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr);
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 315e7e8f..0ea3f4b1 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -974,7 +974,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 
 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
 {
-	if (options.vulkan_semantics)
+	if (flattened_buffer_blocks.count(var.self))
+		emit_buffer_block_flattened(var);
+	else if (options.vulkan_semantics)
 		emit_push_constant_block_vulkan(var);
 	else
 		emit_push_constant_block_glsl(var);
@@ -1016,17 +1018,11 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
 {
 	if (flattened_buffer_blocks.count(var.self))
-	{
 		emit_buffer_block_flattened(var);
-	}
 	else if (is_legacy())
-	{
 		emit_buffer_block_legacy(var);
-	}
 	else
-	{
 		emit_buffer_block_native(var);
-	}
 }
 
 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
@@ -3312,21 +3308,13 @@ std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *
                                                  const SPIRType &target_type, uint32_t offset)
 {
 	if (!target_type.array.empty())
-	{
 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
-	}
 	else if (target_type.basetype == SPIRType::Struct)
-	{
 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
-	}
 	else if (target_type.columns > 1)
-	{
 		return flattened_access_chain_matrix(base, indices, count, target_type, offset);
-	}
 	else
-	{
 		return flattened_access_chain_vector_scalar(base, indices, count, target_type, offset);
-	}
 }
 
 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
@@ -3334,7 +3322,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 {
 	std::string expr;
 
-	expr += type_to_glsl(target_type);
+	expr += type_to_glsl_constructor(target_type);
 	expr += "(";
 
 	for (size_t i = 0; i < target_type.member_types.size(); ++i)
@@ -3358,7 +3346,7 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 {
 	std::string expr;
 
-	expr += type_to_glsl(target_type);
+	expr += type_to_glsl_constructor(target_type);
 	expr += "(";
 
 	for (uint32_t i = 0; i < target_type.columns; ++i)
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 5eef0365..b19d166c 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -139,8 +139,9 @@ public:
 	void require_extension(const std::string &ext);
 
 	// Legacy GLSL compatibility method.
-	// Takes a uniform or storage buffer variable and flattens it into a vec4 array[N]; array instead.
-	// For this to work, all types in the block must not be integers or vector of integers.
+	// Takes a uniform or push constant variable and flattens it into a (i|u)vec4 array[N]; array instead.
+	// For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but
+	// mixing int and float is not.
 	// The name of the uniform array will be the same as the interface block name.
 	void flatten_buffer_block(uint32_t id);
 

From d3cad993473d575c43b4600f24d25a70430388ba Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 11:30:33 +0100
Subject: [PATCH 03/17] Cleanups for flattened access chains.

---
 spirv_common.hpp |  5 +++++
 spirv_cross.cpp  | 49 +++++++++++++++++++++++++++++++++++-------
 spirv_cross.hpp  |  1 +
 spirv_glsl.cpp   | 56 ++++++++++++++++++++----------------------------
 4 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/spirv_common.hpp b/spirv_common.hpp
index 8af88180..16285088 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -265,6 +265,10 @@ struct SPIRType : IVariant
 	// Since we cannot rely on OpName to be equal, we need to figure out aliases.
 	uint32_t type_alias = 0;
 
+	// Denotes the type which this type is based on.
+	// Allows the backend to traverse how a complex type is built up during access chains.
+	uint32_t parent_type = 0;
+
 	// Used in backends to avoid emitting members with conflicting names.
 	std::unordered_set<std::string> member_name_cache;
 };
@@ -904,6 +908,7 @@ struct Meta
 		uint32_t binding = 0;
 		uint32_t offset = 0;
 		uint32_t array_stride = 0;
+		uint32_t matrix_stride = 0;
 		uint32_t input_attachment = 0;
 		uint32_t spec_id = 0;
 		bool builtin = false;
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index f91e5b3d..64b1f396 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -842,6 +842,10 @@ void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 		dec.spec_id = argument;
 		break;
 
+	case DecorationMatrixStride:
+		dec.matrix_stride = argument;
+		break;
+
 	default:
 		break;
 	}
@@ -961,6 +965,10 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 		dec.array_stride = argument;
 		break;
 
+	case DecorationMatrixStride:
+		dec.matrix_stride = argument;
+		break;
+
 	case DecorationBinding:
 		dec.binding = argument;
 		break;
@@ -1020,6 +1028,10 @@ uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
 		return dec.input_attachment;
 	case DecorationSpecId:
 		return dec.spec_id;
+	case DecorationArrayStride:
+		return dec.array_stride;
+	case DecorationMatrixStride:
+		return dec.matrix_stride;
 	default:
 		return 1;
 	}
@@ -1271,6 +1283,7 @@ void Compiler::parse(const Instruction &instruction)
 		vecbase = base;
 		vecbase.vecsize = vecsize;
 		vecbase.self = id;
+		vecbase.parent_type = ops[1];
 		break;
 	}
 
@@ -1285,6 +1298,7 @@ void Compiler::parse(const Instruction &instruction)
 		matrixbase = base;
 		matrixbase.columns = colcount;
 		matrixbase.self = id;
+		matrixbase.parent_type = ops[1];
 		break;
 	}
 
@@ -1302,6 +1316,7 @@ void Compiler::parse(const Instruction &instruction)
 
 		arraybase.array_size_literal.push_back(literal);
 		arraybase.array.push_back(literal ? c->scalar() : ops[2]);
+		arraybase.parent_type = ops[1];
 		// Do NOT set arraybase.self!
 		break;
 	}
@@ -1316,6 +1331,7 @@ void Compiler::parse(const Instruction &instruction)
 		arraybase = base;
 		arraybase.array.push_back(0);
 		arraybase.array_size_literal.push_back(true);
+		arraybase.parent_type = ops[1];
 		// Do NOT set arraybase.self!
 		break;
 	}
@@ -1371,6 +1387,8 @@ void Compiler::parse(const Instruction &instruction)
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;
 
+		ptrbase.parent_type = ops[2];
+
 		// Do NOT set ptrbase.self!
 		break;
 	}
@@ -2045,6 +2063,17 @@ uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_
 		SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
 }
 
+uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
+{
+	// Decoration must be set in valid SPIR-V, otherwise throw.
+	// MatrixStride is part of OpMemberDecorate.
+	auto &dec = meta[type.self].members[index];
+	if (dec.decoration_flags & (1ull << DecorationMatrixStride))
+		return dec.matrix_stride;
+	else
+		SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
+}
+
 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
 {
 	uint32_t last = uint32_t(type.member_types.size() - 1);
@@ -2067,7 +2096,7 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 	case SPIRType::Image:
 	case SPIRType::SampledImage:
 	case SPIRType::Sampler:
-		SPIRV_CROSS_THROW("Querying size for object with opaque size.\n");
+		SPIRV_CROSS_THROW("Querying size for object with opaque size.");
 
 	default:
 		break;
@@ -2084,22 +2113,26 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 	}
 	else
 	{
-		size_t component_size = type.width / 8;
 		unsigned vecsize = type.vecsize;
 		unsigned columns = type.columns;
 
 		// Vectors.
 		if (columns == 1)
+		{
+			size_t component_size = type.width / 8;
 			return vecsize * component_size;
+		}
 		else
 		{
-			// Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
-			if ((flags & (1ull << DecorationRowMajor)) && columns == 3)
-				columns = 4;
-			else if ((flags & (1ull << DecorationColMajor)) && vecsize == 3)
-				vecsize = 4;
+			uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);
 
-			return vecsize * columns * component_size;
+			// Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
+			if (flags & (1ull << DecorationRowMajor))
+				return matrix_stride * vecsize;
+			else if (flags & (1ull << DecorationColMajor))
+				return matrix_stride * columns;
+			else
+				SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
 		}
 	}
 }
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index b919050b..a52c2da9 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -459,6 +459,7 @@ protected:
 
 	uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const;
 	uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const;
+	uint32_t type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const;
 
 	bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const;
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 0ea3f4b1..a2dda172 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -3393,10 +3393,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
                                                                              bool *need_transpose)
 {
 	const auto *type = &expression_type(base);
-	uint32_t type_size = 0;
-
-	// For resolving array accesses, etc, keep a local copy for poking.
-	SPIRType temp;
+	uint32_t current_type = type->self;
+	uint32_t matrix_stride = 0;
 
 	std::string expr;
 	bool row_major_matrix_needs_conversion = false;
@@ -3408,26 +3406,18 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// Arrays
 		if (!type->array.empty())
 		{
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
-
-			uint32_t array_size = temp.array.back();
-			temp.array.pop_back();
-
-			assert(type_size > 0);
-			assert(type_size % array_size == 0);
-
-			uint32_t array_stride = type_size / array_size;
-			assert(array_stride % 16 == 0);
+			uint32_t array_stride = get_decoration(current_type, DecorationArrayStride);
+			if (!array_stride)
+				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
 
 			expr += to_expression(index);
 			expr += " * ";
 			expr += convert_to_string(array_stride / 16);
 			expr += " + ";
 
-			type_size = array_stride;
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(parent_type);
+			current_type = parent_type;
 		}
 		// For structs, the index refers to a constant, which indexes into the members.
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
@@ -3440,10 +3430,16 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 
 			offset += type_struct_member_offset(*type, index);
 
-			type_size = uint32_t(get_declared_struct_member_size(*type, index));
 			row_major_matrix_needs_conversion =
 			    (combined_decoration_for_member(*type, index) & (1ull << DecorationRowMajor)) != 0;
+
+			current_type = type->member_types[index];
+
+			auto &struct_type = *type;
 			type = &get<SPIRType>(type->member_types[index]);
+
+			if (type->columns > 1)
+				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
 		}
 		// Matrix -> Vector
 		else if (type->columns > 1)
@@ -3455,14 +3451,11 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Cannot flatten dynamic matrix indexing!");
 
 			index = get<SPIRConstant>(index).scalar();
+			offset += index * matrix_stride;
 
-			offset += index * 16;
-
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
-			temp.columns = 1;
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(type->parent_type);
+			current_type = parent_type;
 		}
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
@@ -3471,14 +3464,11 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Cannot flatten dynamic vector indexing!");
 
 			index = get<SPIRConstant>(index).scalar();
+			offset += index * type->width / 8;
 
-			offset += index * 4;
-
-			// We have to modify the type, so keep a local copy.
-			if (&temp != type)
-				temp = *type;
-			type = &temp;
-			temp.vecsize = 1;
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(type->parent_type);
+			current_type = parent_type;
 		}
 		else
 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");

From 9540979c55f779283880d75f36ed8b5ef5c83fc9 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 12:29:20 +0100
Subject: [PATCH 04/17] Support int and uint as flattened UBO types.

---
 .../flatten/push-constant.flatten.vert        | 13 ++++
 reference/shaders/flatten/types.flatten.frag  | 14 ++++
 shaders/flatten/push-constant.flatten.vert    | 16 ++++
 shaders/flatten/types.flatten.frag            | 27 +++++++
 spirv_cross.cpp                               | 25 +++++++
 spirv_cross.hpp                               |  1 +
 spirv_glsl.cpp                                | 74 +++++++++++++++----
 spirv_glsl.hpp                                |  2 +-
 8 files changed, 157 insertions(+), 15 deletions(-)
 create mode 100644 reference/shaders/flatten/push-constant.flatten.vert
 create mode 100644 reference/shaders/flatten/types.flatten.frag
 create mode 100644 shaders/flatten/push-constant.flatten.vert
 create mode 100644 shaders/flatten/types.flatten.frag

diff --git a/reference/shaders/flatten/push-constant.flatten.vert b/reference/shaders/flatten/push-constant.flatten.vert
new file mode 100644
index 00000000..09f2e9da
--- /dev/null
+++ b/reference/shaders/flatten/push-constant.flatten.vert
@@ -0,0 +1,13 @@
+#version 310 es
+
+uniform vec4 PushMe[5];
+layout(location = 1) in vec4 Pos;
+layout(location = 0) out vec2 vRot;
+layout(location = 0) in vec2 Rot;
+
+void main()
+{
+    gl_Position = mat4(PushMe[0], PushMe[1], PushMe[2], PushMe[3]) * Pos;
+    vRot = mat2(PushMe[4].xy, PushMe[4].zw) * Rot;
+}
+
diff --git a/reference/shaders/flatten/types.flatten.frag b/reference/shaders/flatten/types.flatten.frag
new file mode 100644
index 00000000..a74327d9
--- /dev/null
+++ b/reference/shaders/flatten/types.flatten.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+uniform mediump ivec4 UBO1[2];
+uniform mediump uvec4 UBO2[2];
+uniform vec4 UBO0[2];
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = ((((vec4(UBO1[0]) + vec4(UBO1[1])) + vec4(UBO2[0])) + vec4(UBO2[1])) + UBO0[0]) + UBO0[1];
+}
+
diff --git a/shaders/flatten/push-constant.flatten.vert b/shaders/flatten/push-constant.flatten.vert
new file mode 100644
index 00000000..8a7fe3af
--- /dev/null
+++ b/shaders/flatten/push-constant.flatten.vert
@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(push_constant, std430) uniform PushMe
+{
+   mat4 MVP;
+   mat2 Rot; // The MatrixStride will be 8 here.
+} registers;
+
+layout(location = 0) in vec2 Rot;
+layout(location = 1) in vec4 Pos;
+layout(location = 0) out vec2 vRot;
+void main()
+{
+   gl_Position = registers.MVP * Pos;
+   vRot = registers.Rot * Rot;
+}
diff --git a/shaders/flatten/types.flatten.frag b/shaders/flatten/types.flatten.frag
new file mode 100644
index 00000000..faab5b7e
--- /dev/null
+++ b/shaders/flatten/types.flatten.frag
@@ -0,0 +1,27 @@
+#version 310 es
+precision mediump float;
+
+layout(std140, binding = 0) uniform UBO0
+{
+   vec4 a;
+   vec4 b;
+};
+
+layout(std140, binding = 0) uniform UBO1
+{
+   ivec4 c;
+   ivec4 d;
+};
+
+layout(std140, binding = 0) uniform UBO2
+{
+   uvec4 e;
+   uvec4 f;
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+   FragColor = vec4(c) + vec4(d) + vec4(e) + vec4(f) + a + b; 
+}
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 64b1f396..f9deb267 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -3056,3 +3056,28 @@ uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
 
 	return base_flags | (~all_members_flag_mask);
 }
+
+bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
+{
+	if (type.basetype == SPIRType::Struct)
+	{
+		base_type = SPIRType::Unknown;
+		for (auto &member_type : type.member_types)
+		{
+			SPIRType::BaseType member_base;
+			if (!get_common_basic_type(get<SPIRType>(member_type), member_base))
+				return false;
+
+			if (base_type == SPIRType::Unknown)
+				base_type = member_base;
+			else if (base_type != member_base)
+				return false;
+		}
+		return true;
+	}
+	else
+	{
+		base_type = type.basetype;
+		return true;
+	}
+}
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index a52c2da9..fab5489f 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -579,6 +579,7 @@ protected:
 	VariableTypeRemapCallback variable_remap_callback;
 
 	uint64_t get_buffer_block_flags(const SPIRVariable &var);
+	bool get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type);
 };
 }
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index a2dda172..33886a4d 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1094,7 +1094,33 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
 	auto buffer_name = to_name(type.self, false);
 	size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
 
-	statement("uniform vec4 ", buffer_name, "[", buffer_size, "];");
+	SPIRType::BaseType basic_type;
+	if (get_common_basic_type(type, basic_type))
+	{
+		SPIRType tmp;
+		tmp.basetype = basic_type;
+
+		const char *flat_type = nullptr;
+		switch (basic_type)
+		{
+		case SPIRType::Float:
+			flat_type = "vec4 ";
+			break;
+		case SPIRType::Int:
+			flat_type = "ivec4 ";
+			break;
+		case SPIRType::UInt:
+			flat_type = "uvec4 ";
+			break;
+		default:
+			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
+		}
+
+		auto flags = get_buffer_block_flags(var);
+		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), flat_type, buffer_name, "[", buffer_size, "];");
+	}
+	else
+		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
 }
 
 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
@@ -3346,15 +3372,18 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 {
 	std::string expr;
 
+	uint32_t matrix_stride = 0;
+	flattened_access_chain_offset(base, indices, count, offset, nullptr, &matrix_stride);
+
 	expr += type_to_glsl_constructor(target_type);
 	expr += "(";
 
-	for (uint32_t i = 0; i < target_type.columns; ++i)
+	for (uint32_t i = 0; i < target_type.columns; i++)
 	{
 		if (i != 0)
 			expr += ", ";
 
-		expr += flattened_access_chain_vector_scalar(base, indices, count, target_type, offset + i * 16);
+		expr += flattened_access_chain_vector_scalar(base, indices, count, target_type, offset + i * matrix_stride);
 	}
 
 	expr += ")";
@@ -3365,13 +3394,10 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
                                                                const SPIRType &target_type, uint32_t offset)
 {
-	if (target_type.basetype != SPIRType::Float)
-		SPIRV_CROSS_THROW("Access chains that use non-floating-point base types can not be flattened");
-
 	auto result = flattened_access_chain_offset(base, indices, count, offset);
 
-	assert(result.second % 4 == 0);
-	uint32_t index = result.second / 4;
+	assert(result.second % (target_type.width / 8) == 0);
+	uint32_t index = 8 * result.second / target_type.width;
 
 	auto buffer_name = to_name(expression_type(base).self);
 
@@ -3379,7 +3405,7 @@ std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, co
 
 	expr += buffer_name;
 	expr += "[";
-	expr += result.first; // this is a series of N1*k1+N2*k2+... that is either empty or ends with a +
+	expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
 	expr += convert_to_string(index / 4);
 	expr += "]";
 
@@ -3390,7 +3416,7 @@ std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, co
 
 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
                                                                              uint32_t count, uint32_t offset,
-                                                                             bool *need_transpose)
+                                                                             bool *need_transpose, uint32_t *out_matrix_stride)
 {
 	const auto *type = &expression_type(base);
 	uint32_t current_type = type->self;
@@ -3410,10 +3436,28 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 			if (!array_stride)
 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
 
-			expr += to_expression(index);
-			expr += " * ";
-			expr += convert_to_string(array_stride / 16);
-			expr += " + ";
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				// Constant array access.
+				offset += constant->scalar() * array_stride;
+			}
+			else
+			{
+				// Dynamic array access.
+				// FIXME: This will need to change if we support other flattening types than 32-bit.
+				const uint32_t word_stride = 16;
+				if (array_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
+						                  "Likely culprit here is a float or vec2 array inside a push constant block. This cannot be flattened.");
+				}
+
+				expr += to_expression(index);
+				expr += " * ";
+				expr += convert_to_string(array_stride / word_stride);
+				expr += " + ";
+			}
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(parent_type);
@@ -3476,6 +3520,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 
 	if (need_transpose)
 		*need_transpose = row_major_matrix_needs_conversion;
+	if (out_matrix_stride)
+		*out_matrix_stride = matrix_stride;
 
 	return std::make_pair(expr, offset);
 }
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index b19d166c..ba641347 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -329,7 +329,7 @@ protected:
 	                                                 const SPIRType &target_type, uint32_t offset);
 	std::pair<std::string, uint32_t> flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
 	                                                               uint32_t count, uint32_t offset,
-	                                                               bool *need_transpose = nullptr);
+	                                                               bool *need_transpose = nullptr, uint32_t *matrix_stride = nullptr);
 
 	const char *index_to_swizzle(uint32_t index);
 	std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr);

From 69af27d8beaac94898a01dd09240a685e501359b Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 12:35:57 +0100
Subject: [PATCH 05/17] Expand array flatten test a little.

---
 reference/shaders/flatten/array.flatten.vert | 5 +++--
 shaders/flatten/array.flatten.vert           | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/reference/shaders/flatten/array.flatten.vert b/reference/shaders/flatten/array.flatten.vert
index 3eefa4e8..ba4f2771 100644
--- a/reference/shaders/flatten/array.flatten.vert
+++ b/reference/shaders/flatten/array.flatten.vert
@@ -1,10 +1,11 @@
 #version 310 es
 
-uniform vec4 UBO[14];
+uniform vec4 UBO[16];
 in vec4 aVertex;
 
 void main()
 {
-    gl_Position = (mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + UBO[13];
+    vec4 offset = (UBO[10] + UBO[5]) + vec4(UBO[14].x);
+    gl_Position = ((mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + UBO[15]) + offset;
 }
 
diff --git a/shaders/flatten/array.flatten.vert b/shaders/flatten/array.flatten.vert
index a48f2ec0..2d09ef30 100644
--- a/shaders/flatten/array.flatten.vert
+++ b/shaders/flatten/array.flatten.vert
@@ -4,7 +4,7 @@ layout(std140) uniform UBO
 {
     uniform mat4 uMVP;
     vec4 A1[2];
-    vec4 A2[2][2];
+    vec4 A2[2][3];
     float A3[3];
     vec4 Offset;
 };
@@ -12,5 +12,6 @@ in vec4 aVertex;
 
 void main()
 {
-    gl_Position = uMVP * aVertex + Offset;
+    vec4 offset = A2[1][1] + A1[1] + A3[2];
+    gl_Position = uMVP * aVertex + Offset + offset;
 }

From d1dcced1cbc0467ebce078a034d8e949a827732b Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 12:39:16 +0100
Subject: [PATCH 06/17] Fixups to the flatten tests.

---
 .../flatten/push-constant.flatten.vert        |  4 +--
 shaders/flatten/array.flatten.vert            |  2 +-
 shaders/flatten/basic.flatten.vert            |  2 +-
 shaders/flatten/push-constant.flatten.vert    |  3 +-
 shaders/flatten/swizzle.flatten.vert          | 28 +++++++++----------
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/reference/shaders/flatten/push-constant.flatten.vert b/reference/shaders/flatten/push-constant.flatten.vert
index 09f2e9da..216c1f9d 100644
--- a/reference/shaders/flatten/push-constant.flatten.vert
+++ b/reference/shaders/flatten/push-constant.flatten.vert
@@ -1,6 +1,6 @@
 #version 310 es
 
-uniform vec4 PushMe[5];
+uniform vec4 PushMe[6];
 layout(location = 1) in vec4 Pos;
 layout(location = 0) out vec2 vRot;
 layout(location = 0) in vec2 Rot;
@@ -8,6 +8,6 @@ layout(location = 0) in vec2 Rot;
 void main()
 {
     gl_Position = mat4(PushMe[0], PushMe[1], PushMe[2], PushMe[3]) * Pos;
-    vRot = mat2(PushMe[4].xy, PushMe[4].zw) * Rot;
+    vRot = (mat2(PushMe[4].xy, PushMe[4].zw) * Rot) + vec2(PushMe[5].z);
 }
 
diff --git a/shaders/flatten/array.flatten.vert b/shaders/flatten/array.flatten.vert
index 2d09ef30..db58ef44 100644
--- a/shaders/flatten/array.flatten.vert
+++ b/shaders/flatten/array.flatten.vert
@@ -2,7 +2,7 @@
 
 layout(std140) uniform UBO
 {
-    uniform mat4 uMVP;
+    mat4 uMVP;
     vec4 A1[2];
     vec4 A2[2][3];
     float A3[3];
diff --git a/shaders/flatten/basic.flatten.vert b/shaders/flatten/basic.flatten.vert
index 801724f3..f4e9bb39 100644
--- a/shaders/flatten/basic.flatten.vert
+++ b/shaders/flatten/basic.flatten.vert
@@ -2,7 +2,7 @@
 
 layout(std140) uniform UBO
 {
-    uniform mat4 uMVP;
+    mat4 uMVP;
 };
 in vec4 aVertex;
 in vec3 aNormal;
diff --git a/shaders/flatten/push-constant.flatten.vert b/shaders/flatten/push-constant.flatten.vert
index 8a7fe3af..c7b1b42e 100644
--- a/shaders/flatten/push-constant.flatten.vert
+++ b/shaders/flatten/push-constant.flatten.vert
@@ -4,6 +4,7 @@ layout(push_constant, std430) uniform PushMe
 {
    mat4 MVP;
    mat2 Rot; // The MatrixStride will be 8 here.
+   float Arr[4];
 } registers;
 
 layout(location = 0) in vec2 Rot;
@@ -12,5 +13,5 @@ layout(location = 0) out vec2 vRot;
 void main()
 {
    gl_Position = registers.MVP * Pos;
-   vRot = registers.Rot * Rot;
+   vRot = registers.Rot * Rot + registers.Arr[2]; // Constant access should work even if array stride is just 4 here.
 }
diff --git a/shaders/flatten/swizzle.flatten.vert b/shaders/flatten/swizzle.flatten.vert
index 6cdc68ef..cd2ddd84 100644
--- a/shaders/flatten/swizzle.flatten.vert
+++ b/shaders/flatten/swizzle.flatten.vert
@@ -4,27 +4,27 @@
 layout(std140) uniform UBO
 {
     // 16b boundary
-    uniform vec4 A;
+    vec4 A;
     // 16b boundary
-    uniform vec2 B0;
-    uniform vec2 B1;
+    vec2 B0;
+    vec2 B1;
     // 16b boundary
-    uniform float C0;
+    float C0;
     // 16b boundary (vec3 is aligned to 16b)
-    uniform vec3 C1;
+    vec3 C1;
     // 16b boundary
-    uniform vec3 D0;
-    uniform float D1;
+    vec3 D0;
+    float D1;
     // 16b boundary
-    uniform float E0;
-    uniform float E1;
-    uniform float E2;
-    uniform float E3;
+    float E0;
+    float E1;
+    float E2;
+    float E3;
     // 16b boundary
-    uniform float F0;
-    uniform vec2 F1;
+    float F0;
+    vec2 F1;
     // 16b boundary (vec2 before us is aligned to 8b)
-    uniform float F2;
+    float F2;
 };
 
 out vec4 oA, oB, oC, oD, oE, oF;

From 87f3c579451ef6dbc7260f0efb449bb417f47d9a Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 12:47:26 +0100
Subject: [PATCH 07/17] Formatting.

---
 spirv_glsl.cpp | 12 ++++++++----
 spirv_glsl.hpp |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 33886a4d..f24542b1 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1117,7 +1117,8 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
 		}
 
 		auto flags = get_buffer_block_flags(var);
-		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), flat_type, buffer_name, "[", buffer_size, "];");
+		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), flat_type, buffer_name, "[", buffer_size,
+		          "];");
 	}
 	else
 		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
@@ -3416,7 +3417,8 @@ std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, co
 
 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
                                                                              uint32_t count, uint32_t offset,
-                                                                             bool *need_transpose, uint32_t *out_matrix_stride)
+                                                                             bool *need_transpose,
+                                                                             uint32_t *out_matrix_stride)
 {
 	const auto *type = &expression_type(base);
 	uint32_t current_type = type->self;
@@ -3449,8 +3451,10 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				const uint32_t word_stride = 16;
 				if (array_stride % word_stride)
 				{
-					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
-						                  "Likely culprit here is a float or vec2 array inside a push constant block. This cannot be flattened.");
+					SPIRV_CROSS_THROW(
+					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
+					    "Likely culprit here is a float or vec2 array inside a push constant block. This cannot be "
+					    "flattened.");
 				}
 
 				expr += to_expression(index);
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index ba641347..4bfd401e 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -329,7 +329,8 @@ protected:
 	                                                 const SPIRType &target_type, uint32_t offset);
 	std::pair<std::string, uint32_t> flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
 	                                                               uint32_t count, uint32_t offset,
-	                                                               bool *need_transpose = nullptr, uint32_t *matrix_stride = nullptr);
+	                                                               bool *need_transpose = nullptr,
+	                                                               uint32_t *matrix_stride = nullptr);
 
 	const char *index_to_swizzle(uint32_t index);
 	std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr);

From d93dc38415b82e9c0c9bc1272c59fbdc5fb2cba0 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 12:53:17 +0100
Subject: [PATCH 08/17] Use correct glslang revision for reference output ...

---
 reference/shaders/comp/shared.comp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp
index 9ffcc636..d0987a65 100644
--- a/reference/shaders/comp/shared.comp
+++ b/reference/shaders/comp/shared.comp
@@ -9,7 +9,7 @@ layout(binding = 0, std430) readonly buffer SSBO
 layout(binding = 1, std430) writeonly buffer SSBO2
 {
     float out_data[];
-} _43;
+} _44;
 
 shared float sShared[4];
 
@@ -20,6 +20,6 @@ void main()
     sShared[gl_LocalInvocationIndex] = idata;
     memoryBarrierShared();
     barrier();
-    _43.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
+    _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
 }
 

From 3eb2e52c4e470673d7e73981faa6ec19af086c4b Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sat, 21 Jan 2017 13:49:32 +0100
Subject: [PATCH 09/17] Fix bugs with row-major matrices inside flattened UBOs.

---
 .../shaders/flatten/rowmajor.flatten.vert     |  3 +-
 .../flatten/struct.rowmajor.flatten.vert      | 25 ++++++++++
 shaders/flatten/rowmajor.flatten.vert         |  2 +
 shaders/flatten/struct.rowmajor.flatten.vert  | 26 ++++++++++
 spirv_glsl.cpp                                | 50 +++++++++++++++----
 5 files changed, 94 insertions(+), 12 deletions(-)
 create mode 100644 reference/shaders/flatten/struct.rowmajor.flatten.vert
 create mode 100644 shaders/flatten/struct.rowmajor.flatten.vert

diff --git a/reference/shaders/flatten/rowmajor.flatten.vert b/reference/shaders/flatten/rowmajor.flatten.vert
index 56277f41..01d7c78e 100644
--- a/reference/shaders/flatten/rowmajor.flatten.vert
+++ b/reference/shaders/flatten/rowmajor.flatten.vert
@@ -1,10 +1,11 @@
 #version 310 es
 
-uniform vec4 UBO[8];
+uniform vec4 UBO[12];
 in vec4 aVertex;
 
 void main()
 {
+    vec2 v = mat4x2(UBO[8].xy, UBO[9].xy, UBO[10].xy, UBO[11].xy) * aVertex;
     gl_Position = (mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + (aVertex * mat4(UBO[4], UBO[5], UBO[6], UBO[7]));
 }
 
diff --git a/reference/shaders/flatten/struct.rowmajor.flatten.vert b/reference/shaders/flatten/struct.rowmajor.flatten.vert
new file mode 100644
index 00000000..a40aea08
--- /dev/null
+++ b/reference/shaders/flatten/struct.rowmajor.flatten.vert
@@ -0,0 +1,25 @@
+#version 310 es
+
+struct Foo
+{
+    mat3x4 MVP0;
+    mat3x4 MVP1;
+};
+
+uniform vec4 UBO[8];
+layout(location = 0) in vec4 v0;
+layout(location = 1) in vec4 v1;
+layout(location = 0) out vec3 V0;
+layout(location = 1) out vec3 V1;
+
+void main()
+{
+    Foo f;
+    f.MVP0 = Foo(transpose(mat4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(mat4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))).MVP0;
+    f.MVP1 = Foo(transpose(mat4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(mat4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))).MVP1;
+    vec3 a = v0 * f.MVP0;
+    vec3 b = v1 * f.MVP1;
+    V0 = a;
+    V1 = b;
+}
+
diff --git a/shaders/flatten/rowmajor.flatten.vert b/shaders/flatten/rowmajor.flatten.vert
index ae73a213..449cd409 100644
--- a/shaders/flatten/rowmajor.flatten.vert
+++ b/shaders/flatten/rowmajor.flatten.vert
@@ -4,11 +4,13 @@ layout(std140) uniform UBO
 {
     layout(column_major) mat4 uMVPR;
     layout(row_major) mat4 uMVPC;
+    layout(row_major) mat2x4 uMVP;
 };
 
 in vec4 aVertex;
 
 void main()
 {
+	vec2 v = aVertex * uMVP;
 	gl_Position = uMVPR * aVertex + uMVPC * aVertex;
 }
diff --git a/shaders/flatten/struct.rowmajor.flatten.vert b/shaders/flatten/struct.rowmajor.flatten.vert
new file mode 100644
index 00000000..231389b8
--- /dev/null
+++ b/shaders/flatten/struct.rowmajor.flatten.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+struct Foo
+{
+   mat3x4 MVP0;
+   mat3x4 MVP1;
+};
+
+layout(std140, binding = 0) uniform UBO
+{
+   layout(row_major) Foo foo;
+};
+
+layout(location = 0) in vec4 v0;
+layout(location = 1) in vec4 v1;
+layout(location = 0) out vec3 V0;
+layout(location = 1) out vec3 V1;
+
+void main()
+{
+   Foo f = foo;
+   vec3 a = v0 * f.MVP0;
+   vec3 b = v1 * f.MVP1;
+   V0 = a;
+   V1 = b;
+}
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index f24542b1..04d3d520 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -18,6 +18,7 @@
 #include "GLSL.std.450.h"
 #include <algorithm>
 #include <assert.h>
+#include <utility>
 
 using namespace spv;
 using namespace spirv_cross;
@@ -3352,15 +3353,30 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 	expr += type_to_glsl_constructor(target_type);
 	expr += "(";
 
+	// Need to have an extra indices for the member so we can find matrix strides.
+	vector<uint32_t> indices_copy(indices, indices + count);
+	indices_copy.push_back(0);
+
 	for (size_t i = 0; i < target_type.member_types.size(); ++i)
 	{
 		if (i != 0)
 			expr += ", ";
 
 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
-		uint32_t member_offset = type_struct_member_offset(target_type, uint32_t(i));
 
-		expr += flattened_access_chain(base, indices, count, member_type, offset + member_offset);
+		// The indices should be IDs, but to avoid creating new dummy SPIRConstant here,
+		// use MSB to indicate literals.
+		indices_copy.back() = uint32_t(i) | 0x80000000u;
+
+		bool need_transpose = false;
+		if (member_type.columns > 1)
+			flattened_access_chain_offset(base, indices_copy.data(), count + 1, offset, &need_transpose);
+
+		auto tmp = flattened_access_chain(base, indices_copy.data(), count + 1, member_type, offset);
+		if (need_transpose)
+			expr += convert_row_major_matrix(tmp);
+		else
+			expr += tmp;
 	}
 
 	expr += ")";
@@ -3374,17 +3390,24 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 	std::string expr;
 
 	uint32_t matrix_stride = 0;
-	flattened_access_chain_offset(base, indices, count, offset, nullptr, &matrix_stride);
+	bool need_transpose = false;
+	flattened_access_chain_offset(base, indices, count, offset, &need_transpose, &matrix_stride);
 
-	expr += type_to_glsl_constructor(target_type);
+	assert(matrix_stride);
+
+	SPIRType tmp_type = target_type;
+	if (need_transpose)
+		swap(tmp_type.vecsize, tmp_type.columns);
+
+	expr += type_to_glsl_constructor(tmp_type);
 	expr += "(";
 
-	for (uint32_t i = 0; i < target_type.columns; i++)
+	for (uint32_t i = 0; i < tmp_type.columns; i++)
 	{
 		if (i != 0)
 			expr += ", ";
 
-		expr += flattened_access_chain_vector_scalar(base, indices, count, target_type, offset + i * matrix_stride);
+		expr += flattened_access_chain_vector_scalar(base, indices, count, tmp_type, offset + i * matrix_stride);
 	}
 
 	expr += ")";
@@ -3471,23 +3494,28 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
 		else if (type->basetype == SPIRType::Struct)
 		{
-			index = get<SPIRConstant>(index).scalar();
+			// The indices should be IDs, but to avoid creating new dummy SPIRConstant here,
+			// use MSB to indicate literals.
+			// See flattened_access_chain_struct.
+			index = (index & 0x80000000u) ? (index & 0x7fffffffu) : get<SPIRConstant>(index).scalar();
 
 			if (index >= type->member_types.size())
 				SPIRV_CROSS_THROW("Member index is out of bounds!");
 
 			offset += type_struct_member_offset(*type, index);
-
-			row_major_matrix_needs_conversion =
-			    (combined_decoration_for_member(*type, index) & (1ull << DecorationRowMajor)) != 0;
-
 			current_type = type->member_types[index];
 
 			auto &struct_type = *type;
 			type = &get<SPIRType>(type->member_types[index]);
 
 			if (type->columns > 1)
+			{
 				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
+				row_major_matrix_needs_conversion =
+				    (combined_decoration_for_member(struct_type, index) & (1ull << DecorationRowMajor)) != 0;
+			}
+			else
+				row_major_matrix_needs_conversion = false;
 		}
 		// Matrix -> Vector
 		else if (type->columns > 1)

From a35073ad78718af8077d82191a6d29b151de4356 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sun, 22 Jan 2017 08:49:11 +0100
Subject: [PATCH 10/17] Add test for flattened 3-dimensional arrays.

---
 reference/shaders/flatten/array.flatten.vert | 7 ++++---
 shaders/flatten/array.flatten.vert           | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/reference/shaders/flatten/array.flatten.vert b/reference/shaders/flatten/array.flatten.vert
index ba4f2771..d7b60fd1 100644
--- a/reference/shaders/flatten/array.flatten.vert
+++ b/reference/shaders/flatten/array.flatten.vert
@@ -1,11 +1,12 @@
 #version 310 es
 
-uniform vec4 UBO[16];
+uniform vec4 UBO[56];
 in vec4 aVertex;
 
 void main()
 {
-    vec4 offset = (UBO[10] + UBO[5]) + vec4(UBO[14].x);
-    gl_Position = ((mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex) + UBO[15]) + offset;
+    vec4 a4 = UBO[23];
+    vec4 offset = (UBO[50] + UBO[45]) + vec4(UBO[54].x);
+    gl_Position = ((mat4(UBO[40], UBO[41], UBO[42], UBO[43]) * aVertex) + UBO[55]) + offset;
 }
 
diff --git a/shaders/flatten/array.flatten.vert b/shaders/flatten/array.flatten.vert
index db58ef44..a1f1f100 100644
--- a/shaders/flatten/array.flatten.vert
+++ b/shaders/flatten/array.flatten.vert
@@ -2,6 +2,7 @@
 
 layout(std140) uniform UBO
 {
+    vec4 A4[5][4][2];
     mat4 uMVP;
     vec4 A1[2];
     vec4 A2[2][3];
@@ -12,6 +13,7 @@ in vec4 aVertex;
 
 void main()
 {
+    vec4 a4 = A4[2][3][1]; // 2 * (4 * 2) + 3 * 2 + 1 = 16 + 6 + 1 = 23.
     vec4 offset = A2[1][1] + A1[1] + A3[2];
     gl_Position = uMVP * aVertex + Offset + offset;
 }

From 8a80e62fb4868c8792a0a140797eb0f5386f9de0 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sun, 22 Jan 2017 08:51:24 +0100
Subject: [PATCH 11/17] Make get_buffer_block_flags clearer.

Fix empty struct case.
---
 spirv_cross.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index f9deb267..6302939d 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -3045,16 +3045,21 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
+	assert(type.basetype == SPIRType::Struct);
 
 	// Some flags like non-writable, non-readable are actually found
 	// as member decorations. If all members have a decoration set, propagate
 	// the decoration up as a regular variable decoration.
 	uint64_t base_flags = meta[var.self].decoration.decoration_flags;
-	uint64_t all_members_flag_mask = 0;
-	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
-		all_members_flag_mask |= ~get_member_decoration_mask(type.self, i);
 
-	return base_flags | (~all_members_flag_mask);
+	if (type.member_types.empty())
+		return base_flags;
+
+	uint64_t all_members_flag_mask = ~(0ull);
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+		all_members_flag_mask &= get_member_decoration_mask(type.self, i);
+
+	return base_flags | all_members_flag_mask;
 }
 
 bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)

From efba610718aad8e4924edc58dc13ed375b966996 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sun, 22 Jan 2017 08:53:52 +0100
Subject: [PATCH 12/17] Cleanup emit_buffer_block_flattened.

---
 spirv_glsl.cpp | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 04d3d520..a95a12f3 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1100,25 +1100,12 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
 	{
 		SPIRType tmp;
 		tmp.basetype = basic_type;
-
-		const char *flat_type = nullptr;
-		switch (basic_type)
-		{
-		case SPIRType::Float:
-			flat_type = "vec4 ";
-			break;
-		case SPIRType::Int:
-			flat_type = "ivec4 ";
-			break;
-		case SPIRType::UInt:
-			flat_type = "uvec4 ";
-			break;
-		default:
+		tmp.vecsize = 4;
+		if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
 			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
-		}
 
 		auto flags = get_buffer_block_flags(var);
-		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), flat_type, buffer_name, "[", buffer_size,
+		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size,
 		          "];");
 	}
 	else

From fe12fff3240e64c820df62c5b108d87a9c1dd41d Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sun, 22 Jan 2017 09:06:15 +0100
Subject: [PATCH 13/17] Comment type ID a bit better.

---
 spirv_glsl.cpp | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index a95a12f3..e7b14be7 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1105,8 +1105,8 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
 			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
 
 		auto flags = get_buffer_block_flags(var);
-		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size,
-		          "];");
+		statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
+		          buffer_size, "];");
 	}
 	else
 		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
@@ -3431,7 +3431,15 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
                                                                              uint32_t *out_matrix_stride)
 {
 	const auto *type = &expression_type(base);
-	uint32_t current_type = type->self;
+
+	// This holds the type of the current pointer which we are traversing through.
+	// We always start out from a struct type which is the block.
+	// This is primarily used to reflect the array strides and matrix strides later.
+	// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
+	// accordingly as members of structs are accessed.
+	assert(type->basetype == SPIRType::Struct);
+	uint32_t type_id = 0;
+
 	uint32_t matrix_stride = 0;
 
 	std::string expr;
@@ -3444,7 +3452,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// Arrays
 		if (!type->array.empty())
 		{
-			uint32_t array_stride = get_decoration(current_type, DecorationArrayStride);
+			// Here, the type_id will be a type ID for the array type itself.
+			uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
 			if (!array_stride)
 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
 
@@ -3463,8 +3472,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				{
 					SPIRV_CROSS_THROW(
 					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
-					    "Likely culprit here is a float or vec2 array inside a push constant block. This cannot be "
-					    "flattened.");
+					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
+					    "This cannot be flattened. Try using std140 layout instead.");
 				}
 
 				expr += to_expression(index);
@@ -3475,7 +3484,9 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(parent_type);
-			current_type = parent_type;
+			type_id = parent_type;
+
+			// Type ID now refers to the array type with one less dimension.
 		}
 		// For structs, the index refers to a constant, which indexes into the members.
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
@@ -3490,7 +3501,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Member index is out of bounds!");
 
 			offset += type_struct_member_offset(*type, index);
-			current_type = type->member_types[index];
+			type_id = type->member_types[index];
 
 			auto &struct_type = *type;
 			type = &get<SPIRType>(type->member_types[index]);
@@ -3518,7 +3529,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
-			current_type = parent_type;
+			type_id = parent_type;
 		}
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
@@ -3531,7 +3542,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
-			current_type = parent_type;
+			type_id = parent_type;
 		}
 		else
 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");

From d87249a2ebf15678d7813aad14f3cf42ed592f43 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Sun, 22 Jan 2017 09:21:22 +0100
Subject: [PATCH 14/17] Remove hack for accessing matrix decorations in
 flattened structs.

---
 spirv_glsl.cpp | 46 ++++++++++++++++++++++++----------------------
 spirv_glsl.hpp |  6 ++++--
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index e7b14be7..8b071349 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -3320,14 +3320,21 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 }
 
 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                 const SPIRType &target_type, uint32_t offset)
+                                                 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+                                                 bool need_transpose)
 {
 	if (!target_type.array.empty())
 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
 	else if (target_type.basetype == SPIRType::Struct)
 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
 	else if (target_type.columns > 1)
-		return flattened_access_chain_matrix(base, indices, count, target_type, offset);
+	{
+		// Matrix stride might have been set to something non-zero already by the caller.
+		// If not, we should find this information before flattening the matrix access.
+		if (matrix_stride == 0)
+			flattened_access_chain_offset(base, indices, count, offset, &need_transpose, &matrix_stride);
+		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
+	}
 	else
 		return flattened_access_chain_vector_scalar(base, indices, count, target_type, offset);
 }
@@ -3340,26 +3347,28 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 	expr += type_to_glsl_constructor(target_type);
 	expr += "(";
 
-	// Need to have an extra indices for the member so we can find matrix strides.
-	vector<uint32_t> indices_copy(indices, indices + count);
-	indices_copy.push_back(0);
-
 	for (size_t i = 0; i < target_type.member_types.size(); ++i)
 	{
 		if (i != 0)
 			expr += ", ";
 
 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
+		uint32_t member_offset = type_struct_member_offset(target_type, i);
 
-		// The indices should be IDs, but to avoid creating new dummy SPIRConstant here,
-		// use MSB to indicate literals.
-		indices_copy.back() = uint32_t(i) | 0x80000000u;
-
+		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
+		// ahead of time.
 		bool need_transpose = false;
+		uint32_t matrix_stride = 0;
 		if (member_type.columns > 1)
-			flattened_access_chain_offset(base, indices_copy.data(), count + 1, offset, &need_transpose);
+		{
+			need_transpose = (combined_decoration_for_member(target_type, i) & (1ull << DecorationRowMajor)) != 0;
+			matrix_stride = type_struct_member_matrix_stride(target_type, i);
+		}
 
-		auto tmp = flattened_access_chain(base, indices_copy.data(), count + 1, member_type, offset);
+		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
+		                                  need_transpose);
+
+		// Cannot forward transpositions, so resolve them here.
 		if (need_transpose)
 			expr += convert_row_major_matrix(tmp);
 		else
@@ -3372,16 +3381,12 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 }
 
 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                        const SPIRType &target_type, uint32_t offset)
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
 {
 	std::string expr;
 
-	uint32_t matrix_stride = 0;
-	bool need_transpose = false;
-	flattened_access_chain_offset(base, indices, count, offset, &need_transpose, &matrix_stride);
-
 	assert(matrix_stride);
-
 	SPIRType tmp_type = target_type;
 	if (need_transpose)
 		swap(tmp_type.vecsize, tmp_type.columns);
@@ -3492,10 +3497,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
 		else if (type->basetype == SPIRType::Struct)
 		{
-			// The indices should be IDs, but to avoid creating new dummy SPIRConstant here,
-			// use MSB to indicate literals.
-			// See flattened_access_chain_struct.
-			index = (index & 0x80000000u) ? (index & 0x7fffffffu) : get<SPIRConstant>(index).scalar();
+			index = get<SPIRConstant>(index).scalar();
 
 			if (index >= type->member_types.size())
 				SPIRV_CROSS_THROW("Member index is out of bounds!");
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 4bfd401e..b3ed6082 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -320,11 +320,13 @@ protected:
 	                         bool *need_transpose = nullptr);
 
 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                   const SPIRType &target_type, uint32_t offset);
+	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride = 0,
+	                                   bool need_transpose = false);
 	std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset);
 	std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                          const SPIRType &target_type, uint32_t offset);
+	                                          const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                          bool need_transpose);
 	std::string flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                                 const SPIRType &target_type, uint32_t offset);
 	std::pair<std::string, uint32_t> flattened_access_chain_offset(uint32_t base, const uint32_t *indices,

From 1c28ec6885118c9558844edec6c76f144b229584 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>
Date: Wed, 18 Jan 2017 19:05:57 +0100
Subject: [PATCH 15/17] Add basic setup for regression testing Metal output.

---
 .travis.yml                           |  1 +
 CMakeLists.txt                        |  3 +++
 README.md                             |  6 +++++
 reference/shaders-msl/vert/basic.vert | 32 ++++++++++++++++++++++
 shaders-msl/vert/basic.vert           | 15 +++++++++++
 test_shaders.py                       | 38 ++++++++++++++++++++++-----
 6 files changed, 89 insertions(+), 6 deletions(-)
 create mode 100644 reference/shaders-msl/vert/basic.vert
 create mode 100644 shaders-msl/vert/basic.vert

diff --git a/.travis.yml b/.travis.yml
index 593d472b..c3dfc6ca 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,3 +24,4 @@ script:
   - make -j2
   - PATH=./glslang/StandAlone:./SPIRV-Tools/tools:$PATH
   - ./test_shaders.py shaders
+  - ./test_shaders.py --metal shaders-msl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ebf933e3..0532f962 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,6 +93,9 @@ if (${PYTHONINTERP_FOUND})
 	add_test(NAME spirv-cross-test
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders)
+	add_test(NAME spirv-cross-test-metal
+		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --metal
+			${CMAKE_CURRENT_SOURCE_DIR}/shaders-msl)
   endif()
 else()
   message(WARNING "Testing disabled. Could not find python3. If you have python3 installed try running "
diff --git a/README.md b/README.md
index 415f9523..edc0452a 100644
--- a/README.md
+++ b/README.md
@@ -171,6 +171,8 @@ In these cases, run `./test_shaders.py shaders --update` to update the reference
 Always make sure you are running up to date glslangValidator as well as SPIRV-Tools when updating reference files.
 
 In short, the master branch should always be able to run `./test_shaders.py shaders` without failure.
+SPIRV-Cross uses Travis CI to test all pull requests, so it is not strictly needed to perform testing yourself if you have problems running it locally.
+A pull request which does not pass testing on Travis will not be accepted however.
 
 When adding support for new features to SPIRV-Cross, a new shader and reference file should be added which covers usage of the new shader features in question.
 
@@ -205,6 +207,10 @@ The current reference output is contained in reference/.
 
 See `./test_shaders.py --help` for more.
 
+### Metal backend
+
+To test the roundtrip path GLSL -> SPIR-V -> MSL, `--metal` can be added, e.g. `./test_shaders.py --metal shaders-msl`.
+
 ### Updating regression tests
 
 When legitimate changes are found, use `--update` flag to update regression files.
diff --git a/reference/shaders-msl/vert/basic.vert b/reference/shaders-msl/vert/basic.vert
new file mode 100644
index 00000000..b895e104
--- /dev/null
+++ b/reference/shaders-msl/vert/basic.vert
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(0)]];
+};
+
+struct main0_out
+{
+    float3 vNormal [[user(locn0)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize;
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = _16.uMVP * in.aVertex;
+    out.vNormal = in.aNormal;
+    out.gl_Position.y = -(out.gl_Position.y);    // Invert Y-axis for Metal
+    return out;
+}
+
diff --git a/shaders-msl/vert/basic.vert b/shaders-msl/vert/basic.vert
new file mode 100644
index 00000000..801724f3
--- /dev/null
+++ b/shaders-msl/vert/basic.vert
@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    uniform mat4 uMVP;
+};
+in vec4 aVertex;
+in vec3 aNormal;
+out vec3 vNormal;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+    vNormal = aNormal;
+}
diff --git a/test_shaders.py b/test_shaders.py
index cd961db9..97d09bdf 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -66,6 +66,19 @@ def validate_shader(shader, vulkan):
     else:
         subprocess.check_call(['glslangValidator', shader])
 
+def cross_compile_msl(shader):
+    spirv_f, spirv_path = tempfile.mkstemp()
+    msl_f, msl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
+    os.close(spirv_f)
+    os.close(msl_f)
+    subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader])
+    spirv_cross_path = './spirv-cross'
+    subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--metal'])
+    subprocess.check_call(['spirv-val', spirv_path])
+
+    # TODO: Add optional validation of the MSL output.
+    return (spirv_path, msl_path)
+
 def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo):
     spirv_f, spirv_path = tempfile.mkstemp()
     glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
@@ -212,20 +225,30 @@ def test_shader(stats, shader, update, keep):
             a.append(str(i))
         print(','.join(a), file = stats)
 
-def test_shaders_helper(stats, shader_dir, update, malisc, keep):
+def test_shader_msl(stats, shader, update, keep):
+    joined_path = os.path.join(shader[0], shader[1])
+    print('Testing MSL shader:', joined_path)
+    spirv, msl = cross_compile_msl(joined_path)
+    regression_check(shader, msl, update, keep)
+    os.remove(spirv)
+
+def test_shaders_helper(stats, shader_dir, update, malisc, keep, backend):
     for root, dirs, files in os.walk(os.path.join(shader_dir)):
         for i in files:
             path = os.path.join(root, i)
             relpath = os.path.relpath(path, shader_dir)
-            test_shader(stats, (shader_dir, relpath), update, keep)
+            if backend == 'metal':
+                test_shader_msl(stats, (shader_dir, relpath), update, keep)
+            else:
+                test_shader(stats, (shader_dir, relpath), update, keep)
 
-def test_shaders(shader_dir, update, malisc, keep):
+def test_shaders(shader_dir, update, malisc, keep, backend):
     if malisc:
         with open('stats.csv', 'w') as stats:
             print('Shader,OrigRegs,OrigUniRegs,OrigALUShort,OrigLSShort,OrigTEXShort,OrigALULong,OrigLSLong,OrigTEXLong,CrossRegs,CrossUniRegs,CrossALUShort,CrossLSShort,CrossTEXShort,CrossALULong,CrossLSLong,CrossTEXLong', file = stats)
-            test_shaders_helper(stats, shader_dir, update, malisc, keep)
+            test_shaders_helper(stats, shader_dir, update, malisc, keep, backend)
     else:
-        test_shaders_helper(None, shader_dir, update, malisc, keep)
+        test_shaders_helper(None, shader_dir, update, malisc, keep, backend)
 
 def main():
     parser = argparse.ArgumentParser(description = 'Script for regression testing.')
@@ -240,13 +263,16 @@ def main():
     parser.add_argument('--malisc',
             action = 'store_true',
             help = 'Use malisc offline compiler to determine static cycle counts before and after spirv-cross.')
+    parser.add_argument('--metal',
+            action = 'store_true',
+            help = 'Test Metal backend.')
     args = parser.parse_args()
 
     if not args.folder:
         sys.stderr.write('Need shader folder.\n')
         sys.exit(1)
 
-    test_shaders(args.folder, args.update, args.malisc, args.keep)
+    test_shaders(args.folder, args.update, args.malisc, args.keep, 'metal' if args.metal else 'glsl')
     if args.malisc:
         print('Stats in stats.csv!')
     print('Tests completed!')

From ed04c95b0839c8bf04ca30cd0e48b82634a59b2c Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Tue, 24 Jan 2017 07:42:19 -0800
Subject: [PATCH 16/17] Implement flattening of row major matrix indexing

To extract a column from row-major matrix, we need to do a strided load one
component at a time. In this case flattened_access_chain_offset still returns
the offset to the first element, but the stride is equal to matrix stride
instead of vector stride.

For this to work, we need to pass matrix stride (and transpose flag) through,
similar to how matrix flattening works.

Additionally slightly clean up recursive flattened_access_chain structure -
specifically, instead of deciding mid-traversal that we need matrix stride
information, we can just pass the matrix stride through - for access chains
that end in matrix/vector this gets us what we need, and for access chains
that end in structs the flattened_access_chain_struct code will recompute
correct stride/transposition data to pass through further.
---
 .../shaders/flatten/matrixindex.flatten.vert  | 19 ++++
 shaders/flatten/matrixindex.flatten.vert      | 25 +++++
 spirv_glsl.cpp                                | 96 ++++++++++++-------
 spirv_glsl.hpp                                |  9 +-
 4 files changed, 112 insertions(+), 37 deletions(-)
 create mode 100644 reference/shaders/flatten/matrixindex.flatten.vert
 create mode 100644 shaders/flatten/matrixindex.flatten.vert

diff --git a/reference/shaders/flatten/matrixindex.flatten.vert b/reference/shaders/flatten/matrixindex.flatten.vert
new file mode 100644
index 00000000..571ddba7
--- /dev/null
+++ b/reference/shaders/flatten/matrixindex.flatten.vert
@@ -0,0 +1,19 @@
+#version 310 es
+
+uniform vec4 UBO[14];
+out vec4 oA;
+out vec4 oB;
+out vec4 oC;
+out vec4 oD;
+out vec4 oE;
+
+void main()
+{
+    gl_Position = vec4(0.0);
+    oA = UBO[1];
+    oB = vec4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y);
+    oC = UBO[9];
+    oD = vec4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x);
+    oE = vec4(UBO[1].z, float(UBO[6].y), UBO[9].z, float(UBO[12].y));
+}
+
diff --git a/shaders/flatten/matrixindex.flatten.vert b/shaders/flatten/matrixindex.flatten.vert
new file mode 100644
index 00000000..eb6ba2a2
--- /dev/null
+++ b/shaders/flatten/matrixindex.flatten.vert
@@ -0,0 +1,25 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    layout(column_major) mat4 M1C;
+    layout(row_major) mat4 M1R;
+    layout(column_major) mat2x4 M2C;
+    layout(row_major) mat2x4 M2R;
+};
+
+out vec4 oA;
+out vec4 oB;
+out vec4 oC;
+out vec4 oD;
+out vec4 oE;
+
+void main()
+{
+	gl_Position = vec4(0.0);
+	oA = M1C[1];
+	oB = M1R[1];
+	oC = M2C[1];
+	oD = M2R[0];
+	oE = vec4(M1C[1][2], M1R[1][2], M2C[1][2], M2R[1][2]);
+}
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 8b071349..da86c1a9 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -3304,18 +3304,22 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 }
 
 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
-                                  bool *need_transpose)
+                                  bool *out_need_transpose)
 {
 	if (flattened_buffer_blocks.count(base))
 	{
-		if (need_transpose)
-			flattened_access_chain_offset(base, indices, count, 0, need_transpose);
+		uint32_t matrix_stride;
+		bool need_transpose;
+		flattened_access_chain_offset(base, indices, count, 0, &need_transpose, &matrix_stride);
 
-		return flattened_access_chain(base, indices, count, target_type, 0);
+		if (out_need_transpose)
+			*out_need_transpose = target_type.columns > 1 && need_transpose;
+
+		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
 	}
 	else
 	{
-		return access_chain(base, indices, count, false, false, need_transpose);
+		return access_chain(base, indices, count, false, false, out_need_transpose);
 	}
 }
 
@@ -3328,15 +3332,9 @@ std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *
 	else if (target_type.basetype == SPIRType::Struct)
 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
 	else if (target_type.columns > 1)
-	{
-		// Matrix stride might have been set to something non-zero already by the caller.
-		// If not, we should find this information before flattening the matrix access.
-		if (matrix_stride == 0)
-			flattened_access_chain_offset(base, indices, count, offset, &need_transpose, &matrix_stride);
 		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
-	}
 	else
-		return flattened_access_chain_vector_scalar(base, indices, count, target_type, offset);
+		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
 }
 
 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
@@ -3384,13 +3382,13 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
                                                         const SPIRType &target_type, uint32_t offset,
                                                         uint32_t matrix_stride, bool need_transpose)
 {
-	std::string expr;
-
 	assert(matrix_stride);
 	SPIRType tmp_type = target_type;
 	if (need_transpose)
 		swap(tmp_type.vecsize, tmp_type.columns);
 
+	std::string expr;
+
 	expr += type_to_glsl_constructor(tmp_type);
 	expr += "(";
 
@@ -3399,7 +3397,8 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 		if (i != 0)
 			expr += ", ";
 
-		expr += flattened_access_chain_vector_scalar(base, indices, count, tmp_type, offset + i * matrix_stride);
+		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
+		                                      /* need_transpose= */ false);
 	}
 
 	expr += ")";
@@ -3407,27 +3406,61 @@ std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uin
 	return expr;
 }
 
-std::string CompilerGLSL::flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                               const SPIRType &target_type, uint32_t offset)
+std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
 {
 	auto result = flattened_access_chain_offset(base, indices, count, offset);
 
-	assert(result.second % (target_type.width / 8) == 0);
-	uint32_t index = 8 * result.second / target_type.width;
-
 	auto buffer_name = to_name(expression_type(base).self);
 
-	std::string expr;
+	if (need_transpose)
+	{
+		std::string expr;
 
-	expr += buffer_name;
-	expr += "[";
-	expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
-	expr += convert_to_string(index / 4);
-	expr += "]";
+		expr += type_to_glsl_constructor(target_type);
+		expr += "(";
 
-	expr += vector_swizzle(target_type.vecsize, index % 4);
+		for (uint32_t i = 0; i < target_type.vecsize; ++i)
+		{
+			if (i != 0)
+				expr += ", ";
 
-	return expr;
+			uint32_t component_offset = result.second + i * matrix_stride;
+
+			assert(component_offset % (target_type.width / 8) == 0);
+			uint32_t index = component_offset / (target_type.width / 8);
+
+			expr += buffer_name;
+			expr += "[";
+			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
+			expr += convert_to_string(index / 4);
+			expr += "]";
+
+			expr += vector_swizzle(1, index % 4);
+		}
+
+		expr += ")";
+
+		return expr;
+	}
+	else
+	{
+		assert(result.second % (target_type.width / 8) == 0);
+		uint32_t index = result.second / (target_type.width / 8);
+
+		std::string expr;
+
+		expr += buffer_name;
+		expr += "[";
+		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
+		expr += convert_to_string(index / 4);
+		expr += "]";
+
+		expr += vector_swizzle(target_type.vecsize, index % 4);
+
+		return expr;
+	}
 }
 
 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
@@ -3520,14 +3553,11 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 		// Matrix -> Vector
 		else if (type->columns > 1)
 		{
-			if (row_major_matrix_needs_conversion)
-				SPIRV_CROSS_THROW("Matrix indexing is not supported for flattened row major matrices!");
-
 			if (ids[index].get_type() != TypeConstant)
 				SPIRV_CROSS_THROW("Cannot flatten dynamic matrix indexing!");
 
 			index = get<SPIRConstant>(index).scalar();
-			offset += index * matrix_stride;
+			offset += index * (row_major_matrix_needs_conversion ? type->width / 8 : matrix_stride);
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
@@ -3540,7 +3570,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(uin
 				SPIRV_CROSS_THROW("Cannot flatten dynamic vector indexing!");
 
 			index = get<SPIRConstant>(index).scalar();
-			offset += index * type->width / 8;
+			offset += index * (row_major_matrix_needs_conversion ? matrix_stride : type->width / 8);
 
 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index b3ed6082..6e08c099 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -320,15 +320,16 @@ protected:
 	                         bool *need_transpose = nullptr);
 
 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride = 0,
-	                                   bool need_transpose = false);
+	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                   bool need_transpose);
 	std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset);
 	std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
 	                                          bool need_transpose);
-	std::string flattened_access_chain_vector_scalar(uint32_t base, const uint32_t *indices, uint32_t count,
-	                                                 const SPIRType &target_type, uint32_t offset);
+	std::string flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
+	                                          const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+	                                          bool need_transpose);
 	std::pair<std::string, uint32_t> flattened_access_chain_offset(uint32_t base, const uint32_t *indices,
 	                                                               uint32_t count, uint32_t offset,
 	                                                               bool *need_transpose = nullptr,

From 32a561a6c3ec62ae12b053674f193fd1bc82b134 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Tue, 24 Jan 2017 08:09:58 -0800
Subject: [PATCH 17/17] Remove redundant constructor calls for scalar types

---
 reference/shaders/flatten/matrixindex.flatten.vert |  2 +-
 spirv_glsl.cpp                                     | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/reference/shaders/flatten/matrixindex.flatten.vert b/reference/shaders/flatten/matrixindex.flatten.vert
index 571ddba7..4deeaf1d 100644
--- a/reference/shaders/flatten/matrixindex.flatten.vert
+++ b/reference/shaders/flatten/matrixindex.flatten.vert
@@ -14,6 +14,6 @@ void main()
     oB = vec4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y);
     oC = UBO[9];
     oD = vec4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x);
-    oE = vec4(UBO[1].z, float(UBO[6].y), UBO[9].z, float(UBO[12].y));
+    oE = vec4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y);
 }
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index da86c1a9..f35a1969 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -3418,8 +3418,11 @@ std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uin
 	{
 		std::string expr;
 
-		expr += type_to_glsl_constructor(target_type);
-		expr += "(";
+		if (target_type.vecsize > 1)
+		{
+			expr += type_to_glsl_constructor(target_type);
+			expr += "(";
+		}
 
 		for (uint32_t i = 0; i < target_type.vecsize; ++i)
 		{
@@ -3440,7 +3443,10 @@ std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uin
 			expr += vector_swizzle(1, index % 4);
 		}
 
-		expr += ")";
+		if (target_type.vecsize > 1)
+		{
+			expr += ")";
+		}
 
 		return expr;
 	}