MSL: Support scalar block layout.

Relaxed block layout relaxed the restrictions on vector alignment, allowing them to be aligned on scalar boundaries. Scalar block layout relaxes this further, allowing *any* member to be aligned on a scalar boundary. The requirement that a vector not improperly straddle a 16-byte boundary is also relaxed. I've also added a test showing that `std430` layout works with UBOs. I'm troubled by the dual meaning of the `Packed` extended decoration. In some instances (struct, `float[]`, and `vec2[]` members), it actually means the exact opposite, that the member needs extra padding. This is especially problematic for `vec2[]`, because now we need to distinguish the two cases by checking the array stride. I wonder if this should actually be split into two decorations.
2024-11-15 16:30:08 +00:00 · 2019-07-09 20:28:02 -05:00 · 2019-07-09 20:28:02 -05:00 · e5fa7edfd6
commit e5fa7edfd6
parent 8aa6731925
8 changed files with 580 additions and 27 deletions
--- a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ b/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
@ -0,0 +1,156 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef float3x2 packed_float2x3;
+
+struct S0
+{
+    packed_float2 a[1];
+    float b;
+};
+
+struct S1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct S2
+{
+    packed_float3 a[1];
+    float b;
+};
+
+struct S3
+{
+    packed_float2 a;
+    float b;
+};
+
+struct S4
+{
+    float2 c;
+};
+
+struct Content
+{
+    S0 m0s[1];
+    S1 m1s[1];
+    S2 m2s[1];
+    S0 m0;
+    S1 m1;
+    S2 m2;
+    S3 m3;
+    float m4;
+    S4 m3s[8];
+};
+
+struct SSBO1
+{
+    Content content;
+    Content content1[2];
+    Content content2;
+    float2x2 m0;
+    float2x2 m1;
+    packed_float2x3 m2[4];
+    float3x2 m3;
+    float2x2 m4;
+    float2x2 m5[9];
+    packed_float2x3 m6[4][2];
+    float3x2 m7;
+    float array[1];
+};
+
+struct S0_1
+{
+    float4 a[1];
+    float b;
+};
+
+struct S1_1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct S2_1
+{
+    float3 a[1];
+    float b;
+};
+
+struct S3_1
+{
+    float2 a;
+    float b;
+};
+
+struct S4_1
+{
+    float2 c;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1_1 m1s[1];
+    S2_1 m2s[1];
+    S0_1 m0;
+    S1_1 m1;
+    S2_1 m2;
+    S3_1 m3;
+    float m4;
+    char _m8_pad[12];
+    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
+};
+
+struct SSBO0
+{
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
+    float2x2 m0;
+    char _m4_pad[16];
+    float2x2 m1;
+    char _m5_pad[16];
+    float2x3 m2[4];
+    float3x2 m3;
+    char _m7_pad[24];
+    float2x2 m4;
+    char _m8_pad[16];
+    float2x2 m5[9];
+    float2x3 m6[4][2];
+    float3x2 m7;
+    float4 array[1];
+};
+
+kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
+{
+    ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
+    ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
+    ssbo_430.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
+    ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
+    ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
+    ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
+    ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
+    ssbo_430.content.m0.b = ssbo_140.content.m0.b;
+    ssbo_430.content.m1.a = float3(ssbo_140.content.m1.a);
+    ssbo_430.content.m1.b = ssbo_140.content.m1.b;
+    ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
+    ssbo_430.content.m2.b = ssbo_140.content.m2.b;
+    ssbo_430.content.m3.a = ssbo_140.content.m3.a;
+    ssbo_430.content.m3.b = ssbo_140.content.m3.b;
+    ssbo_430.content.m4 = ssbo_140.content.m4;
+    ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
+    ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
+    ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
+    ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
+    ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
+    ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
+    ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
+    ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    ssbo_430.content.m1.a = float2x3(ssbo_430.m2[1]) * float2(ssbo_430.content.m0.a[0]);
+}
+
--- a/reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
+++ b/reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
@ -0,0 +1,36 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float a[1];
+    float2 b[2];
+};
+
+struct UBOEnhancedLayout
+{
+    float c[1];
+    float2 d[2];
+    char _m2_pad[9976];
+    float e;
+};
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    int vIndex [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]])
+{
+    main0_out out = {};
+    out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e;
+    return out;
+}
+
--- a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ b/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
@ -0,0 +1,156 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef float3x2 packed_float2x3;
+
+struct S0
+{
+    packed_float2 a[1];
+    float b;
+};
+
+struct S1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct S2
+{
+    packed_float3 a[1];
+    float b;
+};
+
+struct S3
+{
+    packed_float2 a;
+    float b;
+};
+
+struct S4
+{
+    float2 c;
+};
+
+struct Content
+{
+    S0 m0s[1];
+    S1 m1s[1];
+    S2 m2s[1];
+    S0 m0;
+    S1 m1;
+    S2 m2;
+    S3 m3;
+    float m4;
+    S4 m3s[8];
+};
+
+struct SSBO1
+{
+    Content content;
+    Content content1[2];
+    Content content2;
+    float2x2 m0;
+    float2x2 m1;
+    packed_float2x3 m2[4];
+    float3x2 m3;
+    float2x2 m4;
+    float2x2 m5[9];
+    packed_float2x3 m6[4][2];
+    float3x2 m7;
+    float array[1];
+};
+
+struct S0_1
+{
+    float4 a[1];
+    float b;
+};
+
+struct S1_1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct S2_1
+{
+    float3 a[1];
+    float b;
+};
+
+struct S3_1
+{
+    float2 a;
+    float b;
+};
+
+struct S4_1
+{
+    float2 c;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1_1 m1s[1];
+    S2_1 m2s[1];
+    S0_1 m0;
+    S1_1 m1;
+    S2_1 m2;
+    S3_1 m3;
+    float m4;
+    char _m8_pad[12];
+    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
+};
+
+struct SSBO0
+{
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
+    float2x2 m0;
+    char _m4_pad[16];
+    float2x2 m1;
+    char _m5_pad[16];
+    float2x3 m2[4];
+    float3x2 m3;
+    char _m7_pad[24];
+    float2x2 m4;
+    char _m8_pad[16];
+    float2x2 m5[9];
+    float2x3 m6[4][2];
+    float3x2 m7;
+    float4 array[1];
+};
+
+kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
+{
+    ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
+    ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
+    ssbo_430.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
+    ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
+    ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
+    ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
+    ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
+    ssbo_430.content.m0.b = ssbo_140.content.m0.b;
+    ssbo_430.content.m1.a = float3(ssbo_140.content.m1.a);
+    ssbo_430.content.m1.b = ssbo_140.content.m1.b;
+    ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
+    ssbo_430.content.m2.b = ssbo_140.content.m2.b;
+    ssbo_430.content.m3.a = ssbo_140.content.m3.a;
+    ssbo_430.content.m3.b = ssbo_140.content.m3.b;
+    ssbo_430.content.m4 = ssbo_140.content.m4;
+    ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
+    ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
+    ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
+    ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
+    ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
+    ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
+    ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
+    ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    ssbo_430.content.m1.a = float2x3(ssbo_430.m2[1]) * float2(ssbo_430.content.m0.a[0]);
+}
+
--- a/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
+++ b/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
@ -0,0 +1,36 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float a[1];
+    float2 b[2];
+};
+
+struct UBOEnhancedLayout
+{
+    float c[1];
+    float2 d[2];
+    char _m2_pad[9976];
+    float e;
+};
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    int vIndex [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]])
+{
+    main0_out out = {};
+    out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e;
+    return out;
+}
+
--- a/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ b/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
@ -0,0 +1,89 @@
+#version 310 es
+#extension GL_EXT_scalar_block_layout : require
+
+layout(local_size_x = 1) in;
+
+struct S0
+{
+    vec2 a[1];
+    float b;
+};
+
+struct S1
+{
+    vec3 a;
+    float b;
+};
+
+struct S2
+{
+    vec3 a[1];
+    float b;
+};
+
+struct S3
+{
+    vec2 a;
+    float b;
+};
+
+struct S4
+{
+	vec2 c;
+};
+
+struct Content
+{
+    S0 m0s[1];
+    S1 m1s[1];
+    S2 m2s[1];
+    S0 m0;
+    S1 m1;
+    S2 m2;
+    S3 m3;
+    float m4;
+
+	S4 m3s[8];
+};
+
+layout(binding = 1, scalar) restrict buffer SSBO1
+{
+    Content content;
+    Content content1[2];
+    Content content2;
+
+    layout(column_major) mat2 m0;
+    layout(column_major) mat2 m1;
+    layout(column_major) mat2x3 m2[4];
+    layout(column_major) mat3x2 m3;
+    layout(row_major) mat2 m4;
+    layout(row_major) mat2 m5[9];
+    layout(row_major) mat2x3 m6[4][2];
+    layout(row_major) mat3x2 m7;
+    float array[];
+} ssbo_430;
+
+layout(binding = 0, std140) restrict buffer SSBO0
+{
+    Content content;
+    Content content1[2];
+    Content content2;
+
+    layout(column_major) mat2 m0;
+    layout(column_major) mat2 m1;
+    layout(column_major) mat2x3 m2[4];
+    layout(column_major) mat3x2 m3;
+    layout(row_major) mat2 m4;
+    layout(row_major) mat2 m5[9];
+    layout(row_major) mat2x3 m6[4][2];
+    layout(row_major) mat3x2 m7;
+
+    float array[];
+} ssbo_140;
+
+void main()
+{
+    ssbo_430.content = ssbo_140.content;
+    ssbo_430.content.m1.a = ssbo_430.m2[1] * ssbo_430.content.m0.a[0];	// test packed matrix access
+}
+
--- a/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
+++ b/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+
+layout(std430, binding = 0) uniform UBO
+{
+	float a[1];
+	vec2 b[2];
+};
+
+layout(std430, binding = 1) uniform UBOEnhancedLayout
+{
+	float c[1];
+	vec2 d[2];
+	layout(offset = 10000) float e;
+};
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = a[vIndex] + c[vIndex] + e;
+}
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -2447,7 +2447,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)

 // Returns whether the specified struct member supports a packable type
 // variation that is smaller than the unpacked variation of that type.
-bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
+bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset)
 {
 	// We've already marked it as packable
 	if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
@ -2470,16 +2470,66 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 		return true;
 	}

-	// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
-	// This can happen for struct A { float a }; A a[]; in std140 layout.
-	// TODO: Emit a padded struct which can be used for this purpose.
-	if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct)
+	uint32_t mbr_offset_curr = base_offset + get_member_decoration(ib_type.self, index, DecorationOffset);
+	if (mbr_type.basetype == SPIRType::Struct)
 	{
+		// If this is a struct type, check if any of its members need packing.
+		for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
+		{
+			if (is_member_packable(mbr_type, i, mbr_offset_curr))
+			{
+				set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
+				set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
+				                               mbr_type.member_types[i]);
+			}
+		}
 		size_t declared_struct_size = get_declared_struct_size(mbr_type);
 		size_t alignment = get_declared_struct_member_alignment(ib_type, index);
 		declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
-		if (type_struct_member_array_stride(ib_type, index) > declared_struct_size)
-			return true;
+		// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
+		// This can happen for struct A { float a }; A a[]; in std140 layout.
+		// TODO: Emit a padded struct which can be used for this purpose.
+		if (is_array(mbr_type))
+		{
+			size_t array_stride = type_struct_member_array_stride(ib_type, index);
+			if (array_stride > declared_struct_size)
+				return true;
+			if (array_stride < declared_struct_size)
+			{
+				// If the stride is *less* (i.e. more tightly packed), then
+				// we need to pack the members of the struct itself.
+				for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
+				{
+					if (is_member_packable(mbr_type, i, mbr_offset_curr + array_stride))
+					{
+						set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
+						set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
+						                               mbr_type.member_types[i]);
+					}
+				}
+			}
+		}
+		else
+		{
+			// Pack if there is not enough space between this member and next.
+			if (index < ib_type.member_types.size() - 1)
+			{
+				uint32_t mbr_offset_next =
+				    base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
+				if (declared_struct_size > mbr_offset_next - mbr_offset_curr)
+				{
+					for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
+					{
+						if (is_member_packable(mbr_type, i, mbr_offset_next))
+						{
+							set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
+							set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
+							                               mbr_type.member_types[i]);
+						}
+					}
+				}
+			}
+		}
 	}

 	// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
@ -2490,9 +2540,10 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 	if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
 		return false;

-	// Only row-major matrices need to be packed.
-	if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
-		return false;
+	// Pack if the member's offset doesn't conform to the type's usual
+	// alignment. For example, a float3 at offset 4.
+	if (mbr_offset_curr % get_declared_struct_member_alignment(ib_type, index))
+		return true;

 	if (is_array(mbr_type))
 	{
@ -2509,16 +2560,11 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 	}
 	else
 	{
-		uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
-		// For vectors, pack if the member's offset doesn't conform to the
-		// type's usual alignment. For example, a float3 at offset 4.
-		if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size))
-			return true;
 		// Pack if there is not enough space between this member and next.
 		// If last member, only pack if it's a row-major matrix.
 		if (index < ib_type.member_types.size() - 1)
 		{
-			uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
+			uint32_t mbr_offset_next = base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
 			return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
 		}
 		else
@ -2546,16 +2592,21 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp
 	{
 		// Special handling when storing to a float[] or float2[] in std140 layout.

-		auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
+		uint32_t type_id = get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType);
+		auto &type = get<SPIRType>(type_id);
 		string lhs = to_dereferenced_expression(lhs_expression);
 		string rhs = to_pointer_expression(rhs_expression);
+		uint32_t stride = get_decoration(type_id, DecorationArrayStride);

-		// Unpack the expression so we can store to it with a float or float2.
-		// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
-		if (is_scalar(type) && is_array(type))
-			lhs = enclose_expression(lhs) + ".x";
-		else if (is_vector(type) && type.vecsize == 2 && is_array(type))
-			lhs = enclose_expression(lhs) + ".xy";
+		if (is_array(type) && stride == 4 * type.width / 8)
+		{
+			// Unpack the expression so we can store to it with a float or float2.
+			// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
+			if (is_scalar(type))
+				lhs = enclose_expression(lhs) + ".x";
+			else if (is_vector(type) && type.vecsize == 2)
+				lhs = enclose_expression(lhs) + ".xy";
+		}

 		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
 			statement(lhs, " = ", rhs, ";");
@ -2568,13 +2619,18 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp
 string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
 {
 	const SPIRType *packed_type = nullptr;
+	uint32_t stride = 0;
 	if (packed_type_id)
+	{
 		packed_type = &get<SPIRType>(packed_type_id);
+		stride = get_decoration(packed_type_id, DecorationArrayStride);
+	}

 	// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
-	if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
+	if (packed_type && is_array(*packed_type) && is_scalar(*packed_type) && stride == 4 * packed_type->width / 8)
 		return enclose_expression(expr_str) + ".x";
-	else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
+	else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2 &&
+	         stride == 4 * packed_type->width / 8)
 		return enclose_expression(expr_str) + ".xy";
 	else
 		return join(type_to_glsl(type), "(", expr_str, ")");
@ -5653,7 +5709,8 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 			td_line += ";";
 			add_typedef_line(td_line);
 		}
-		else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
+		else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct &&
+		         type_struct_member_array_stride(type, index) == 4 * membertype.width / 8)
 		{
 			// A "packed" float array, but we pad here instead to 4-vector.
 			override_type = membertype;
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -511,7 +511,7 @@ protected:
 	size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
 	std::string to_component_argument(uint32_t id);
 	void align_struct(SPIRType &ib_type);
-	bool is_member_packable(SPIRType &ib_type, uint32_t index);
+	bool is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset = 0);
 	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
 	std::string get_argument_address_space(const SPIRVariable &argument);
 	std::string get_type_address_space(const SPIRType &type, uint32_t id);