diff --git a/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp index 12547276..6728a4e2 100644 --- a/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/relaxed-block-layout.asm.comp @@ -7,11 +7,16 @@ struct foo { uint bar; packed_float3 baz; + uchar quux; + packed_uchar4 blah; + packed_half2 wibble; }; -kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]]) { - _6.bar = gl_LocalInvocationID.x; - _6.baz = float3(gl_GlobalInvocationID); + _8.bar = gl_LocalInvocationID.x; + _8.baz = float3(gl_GlobalInvocationID); + _8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u)); + _8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy)); } diff --git a/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp index 12547276..6728a4e2 100644 --- a/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp +++ b/reference/shaders-msl/asm/comp/relaxed-block-layout.asm.comp @@ -7,11 +7,16 @@ struct foo { uint bar; packed_float3 baz; + uchar quux; + packed_uchar4 blah; + packed_half2 wibble; }; -kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]]) { - _6.bar = gl_LocalInvocationID.x; - _6.baz = float3(gl_GlobalInvocationID); + _8.bar = gl_LocalInvocationID.x; + _8.baz = float3(gl_GlobalInvocationID); + _8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u)); + _8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy)); } diff --git a/shaders-msl/asm/comp/relaxed-block-layout.asm.comp b/shaders-msl/asm/comp/relaxed-block-layout.asm.comp index 2545cc7b..dd909426 100644 --- a/shaders-msl/asm/comp/relaxed-block-layout.asm.comp +++ b/shaders-msl/asm/comp/relaxed-block-layout.asm.comp @@ -1,34 +1,54 @@ ; SPIR-V ; Version: 1.3 ; Generator: Khronos Glslang Reference Front End; 7 -; Bound: 29 +; Bound: 63 ; Schema: 0 OpCapability Shader + OpCapability StorageBuffer16BitAccess + OpCapability StorageBuffer8BitAccess + OpCapability UniformAndStorageBuffer8BitAccess + OpExtension "SPV_KHR_8bit_storage" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 - OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID %gl_WorkGroupID %gl_NumWorkGroups OpExecutionMode %main LocalSize 1 1 1 OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_16bit_storage" + OpSourceExtension "GL_EXT_shader_8bit_storage" OpName %main "main" OpName %foo "foo" OpMemberName %foo 0 "bar" OpMemberName %foo 1 "baz" + OpMemberName %foo 2 "quux" + OpMemberName %foo 3 "blah" + OpMemberName %foo 4 "wibble" OpName %_ "" OpName %gl_LocalInvocationID "gl_LocalInvocationID" OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_WorkGroupID "gl_WorkGroupID" + OpName %gl_NumWorkGroups "gl_NumWorkGroups" OpMemberDecorate %foo 0 Offset 0 OpMemberDecorate %foo 1 Offset 4 + OpMemberDecorate %foo 2 Offset 16 + OpMemberDecorate %foo 3 Offset 17 + OpMemberDecorate %foo 4 Offset 22 OpDecorate %foo BufferBlock OpDecorate %_ DescriptorSet 0 OpDecorate %_ Binding 0 OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId + OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups %void = OpTypeVoid %3 = OpTypeFunction %void %uint = OpTypeInt 32 0 %float = OpTypeFloat 32 %v3float = OpTypeVector %float 3 - %foo = OpTypeStruct %uint %v3float + %uchar = OpTypeInt 8 0 + %v4uchar = OpTypeVector %uchar 4 + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %foo = OpTypeStruct %uint %v3float %uchar %v4uchar %v2half %_ptr_Uniform_foo = OpTypePointer Uniform %foo %_ = OpVariable %_ptr_Uniform_foo Uniform %int = OpTypeInt 32 1 @@ -42,15 +62,47 @@ %int_1 = OpConstant %int 1 %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input %_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_3 = OpConstant %int 3 +%_ptr_Uniform_v4uchar = OpTypePointer Uniform %v4uchar + %v4uint = OpTypeVector %uint 4 +%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input + %int_4 = OpConstant %int 4 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %v2float = OpTypeVector %float 2 +%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input + %v2uint = OpTypeVector %uint 2 %main = OpFunction %void None %3 %5 = OpLabel - %19 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 - %20 = OpLoad %uint %19 - %22 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 - OpStore %22 %20 - %25 = OpLoad %v3uint %gl_GlobalInvocationID - %26 = OpConvertUToF %v3float %25 - %28 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 - OpStore %28 %26 + %23 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %24 = OpLoad %uint %23 + %26 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpStore %26 %24 + %29 = OpLoad %v3uint %gl_GlobalInvocationID + %30 = OpConvertUToF %v3float %29 + %32 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 + OpStore %32 %30 + %35 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3 + %36 = OpLoad %v4uchar %35 + %38 = OpUConvert %v4uint %36 + %39 = OpVectorShuffle %v3uint %38 %38 0 1 2 + %41 = OpLoad %v3uint %gl_WorkGroupID + %42 = OpIAdd %v3uint %39 %41 + %43 = OpCompositeExtract %uint %42 0 + %44 = OpCompositeExtract %uint %42 1 + %45 = OpCompositeExtract %uint %42 2 + %46 = OpCompositeConstruct %v4uint %43 %44 %45 %uint_0 + %47 = OpUConvert %v4uchar %46 + %48 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3 + OpStore %48 %47 + %51 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4 + %52 = OpLoad %v2half %51 + %54 = OpFConvert %v2float %52 + %57 = OpLoad %v3uint %gl_NumWorkGroups + %58 = OpVectorShuffle %v2uint %57 %57 0 1 + %59 = OpConvertUToF %v2float %58 + %60 = OpFMul %v2float %54 %59 + %61 = OpFConvert %v2half %60 + %62 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4 + OpStore %62 %61 OpReturn OpFunctionEnd diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4f7e518a..5e0ad23f 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1231,8 +1231,8 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index) auto &mbr_type = get(ib_type.member_types[index]); - // Only 3-element vectors or 3-row matrices need to be packed. - if (mbr_type.vecsize != 3) + // Only vectors or 3-row matrices need to be packed. + if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3)) return false; // Only row-major matrices need to be packed. @@ -1240,7 +1240,11 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index) return false; uint32_t component_size = mbr_type.width / 8; - uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns; + uint32_t unpacked_mbr_size; + if (mbr_type.vecsize == 3) + unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns; + else + unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns; if (is_array(mbr_type)) { // If member is an array, and the array stride is larger than the type needs, don't pack it.