diff --git a/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
index 12547276..6728a4e2 100644
--- a/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
@@ -7,11 +7,16 @@ struct foo
 {
     uint bar;
     packed_float3 baz;
+    uchar quux;
+    packed_uchar4 blah;
+    packed_half2 wibble;
 };
 
-kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
 {
-    _6.bar = gl_LocalInvocationID.x;
-    _6.baz = float3(gl_GlobalInvocationID);
+    _8.bar = gl_LocalInvocationID.x;
+    _8.baz = float3(gl_GlobalInvocationID);
+    _8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
+    _8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
 }
 
diff --git a/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
index 12547276..6728a4e2 100644
--- a/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
+++ b/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
@@ -7,11 +7,16 @@ struct foo
 {
     uint bar;
     packed_float3 baz;
+    uchar quux;
+    packed_uchar4 blah;
+    packed_half2 wibble;
 };
 
-kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
 {
-    _6.bar = gl_LocalInvocationID.x;
-    _6.baz = float3(gl_GlobalInvocationID);
+    _8.bar = gl_LocalInvocationID.x;
+    _8.baz = float3(gl_GlobalInvocationID);
+    _8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
+    _8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
 }
 
diff --git a/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
index 2545cc7b..dd909426 100644
--- a/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
+++ b/shaders-msl/asm/comp/relaxed-block-layout.asm.comp
@@ -1,34 +1,54 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Khronos Glslang Reference Front End; 7
-; Bound: 29
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageBuffer8BitAccess
+               OpCapability UniformAndStorageBuffer8BitAccess
+               OpExtension "SPV_KHR_8bit_storage"
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID
+               OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID %gl_WorkGroupID %gl_NumWorkGroups
                OpExecutionMode %main LocalSize 1 1 1
                OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_16bit_storage"
+               OpSourceExtension "GL_EXT_shader_8bit_storage"
                OpName %main "main"
                OpName %foo "foo"
                OpMemberName %foo 0 "bar"
                OpMemberName %foo 1 "baz"
+               OpMemberName %foo 2 "quux"
+               OpMemberName %foo 3 "blah"
+               OpMemberName %foo 4 "wibble"
                OpName %_ ""
                OpName %gl_LocalInvocationID "gl_LocalInvocationID"
                OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %gl_WorkGroupID "gl_WorkGroupID"
+               OpName %gl_NumWorkGroups "gl_NumWorkGroups"
                OpMemberDecorate %foo 0 Offset 0
                OpMemberDecorate %foo 1 Offset 4
+               OpMemberDecorate %foo 2 Offset 16
+               OpMemberDecorate %foo 3 Offset 17
+               OpMemberDecorate %foo 4 Offset 22
                OpDecorate %foo BufferBlock
                OpDecorate %_ DescriptorSet 0
                OpDecorate %_ Binding 0
                OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId
+               OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups
        %void = OpTypeVoid
           %3 = OpTypeFunction %void
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
     %v3float = OpTypeVector %float 3
-        %foo = OpTypeStruct %uint %v3float
+      %uchar = OpTypeInt 8 0
+    %v4uchar = OpTypeVector %uchar 4
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+        %foo = OpTypeStruct %uint %v3float %uchar %v4uchar %v2half
 %_ptr_Uniform_foo = OpTypePointer Uniform %foo
           %_ = OpVariable %_ptr_Uniform_foo Uniform
         %int = OpTypeInt 32 1
@@ -42,15 +62,47 @@
       %int_1 = OpConstant %int 1
 %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
 %_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_v4uchar = OpTypePointer Uniform %v4uchar
+     %v4uint = OpTypeVector %uint 4
+%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+    %v2float = OpTypeVector %float 2
+%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input
+     %v2uint = OpTypeVector %uint 2
        %main = OpFunction %void None %3
           %5 = OpLabel
-         %19 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
-         %20 = OpLoad %uint %19
-         %22 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
-               OpStore %22 %20
-         %25 = OpLoad %v3uint %gl_GlobalInvocationID
-         %26 = OpConvertUToF %v3float %25
-         %28 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
-               OpStore %28 %26
+         %23 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
+         %24 = OpLoad %uint %23
+         %26 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+               OpStore %26 %24
+         %29 = OpLoad %v3uint %gl_GlobalInvocationID
+         %30 = OpConvertUToF %v3float %29
+         %32 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
+               OpStore %32 %30
+         %35 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3
+         %36 = OpLoad %v4uchar %35
+         %38 = OpUConvert %v4uint %36
+         %39 = OpVectorShuffle %v3uint %38 %38 0 1 2
+         %41 = OpLoad %v3uint %gl_WorkGroupID
+         %42 = OpIAdd %v3uint %39 %41
+         %43 = OpCompositeExtract %uint %42 0
+         %44 = OpCompositeExtract %uint %42 1
+         %45 = OpCompositeExtract %uint %42 2
+         %46 = OpCompositeConstruct %v4uint %43 %44 %45 %uint_0
+         %47 = OpUConvert %v4uchar %46
+         %48 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3
+               OpStore %48 %47
+         %51 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4
+         %52 = OpLoad %v2half %51
+         %54 = OpFConvert %v2float %52
+         %57 = OpLoad %v3uint %gl_NumWorkGroups
+         %58 = OpVectorShuffle %v2uint %57 %57 0 1
+         %59 = OpConvertUToF %v2float %58
+         %60 = OpFMul %v2float %54 %59
+         %61 = OpFConvert %v2half %60
+         %62 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4
+               OpStore %62 %61
                OpReturn
                OpFunctionEnd
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 4f7e518a..5e0ad23f 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1231,8 +1231,8 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 
 	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
 
-	// Only 3-element vectors or 3-row matrices need to be packed.
-	if (mbr_type.vecsize != 3)
+	// Only vectors or 3-row matrices need to be packed.
+	if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
 		return false;
 
 	// Only row-major matrices need to be packed.
@@ -1240,7 +1240,11 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 		return false;
 
 	uint32_t component_size = mbr_type.width / 8;
-	uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
+	uint32_t unpacked_mbr_size;
+	if (mbr_type.vecsize == 3)
+		unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
+	else
+		unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
 	if (is_array(mbr_type))
 	{
 		// If member is an array, and the array stride is larger than the type needs, don't pack it.