Merge pull request #756 from cdavis5e/relaxed-block-layout-2
MSL: Also pack 2- and 4- element vectors when necessary.
This commit is contained in:
commit
510e1475c6
@ -7,11 +7,16 @@ struct foo
|
||||
{
|
||||
uint bar;
|
||||
packed_float3 baz;
|
||||
uchar quux;
|
||||
packed_uchar4 blah;
|
||||
packed_half2 wibble;
|
||||
};
|
||||
|
||||
kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
|
||||
{
|
||||
_6.bar = gl_LocalInvocationID.x;
|
||||
_6.baz = float3(gl_GlobalInvocationID);
|
||||
_8.bar = gl_LocalInvocationID.x;
|
||||
_8.baz = float3(gl_GlobalInvocationID);
|
||||
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
|
||||
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
|
||||
}
|
||||
|
||||
|
@ -7,11 +7,16 @@ struct foo
|
||||
{
|
||||
uint bar;
|
||||
packed_float3 baz;
|
||||
uchar quux;
|
||||
packed_uchar4 blah;
|
||||
packed_half2 wibble;
|
||||
};
|
||||
|
||||
kernel void main0(device foo& _6 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
|
||||
{
|
||||
_6.bar = gl_LocalInvocationID.x;
|
||||
_6.baz = float3(gl_GlobalInvocationID);
|
||||
_8.bar = gl_LocalInvocationID.x;
|
||||
_8.baz = float3(gl_GlobalInvocationID);
|
||||
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
|
||||
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
|
||||
}
|
||||
|
||||
|
@ -1,34 +1,54 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 29
|
||||
; Bound: 63
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability StorageBuffer16BitAccess
|
||||
OpCapability StorageBuffer8BitAccess
|
||||
OpCapability UniformAndStorageBuffer8BitAccess
|
||||
OpExtension "SPV_KHR_8bit_storage"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID
|
||||
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID %gl_WorkGroupID %gl_NumWorkGroups
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 450
|
||||
OpSourceExtension "GL_EXT_shader_16bit_storage"
|
||||
OpSourceExtension "GL_EXT_shader_8bit_storage"
|
||||
OpName %main "main"
|
||||
OpName %foo "foo"
|
||||
OpMemberName %foo 0 "bar"
|
||||
OpMemberName %foo 1 "baz"
|
||||
OpMemberName %foo 2 "quux"
|
||||
OpMemberName %foo 3 "blah"
|
||||
OpMemberName %foo 4 "wibble"
|
||||
OpName %_ ""
|
||||
OpName %gl_LocalInvocationID "gl_LocalInvocationID"
|
||||
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
|
||||
OpName %gl_WorkGroupID "gl_WorkGroupID"
|
||||
OpName %gl_NumWorkGroups "gl_NumWorkGroups"
|
||||
OpMemberDecorate %foo 0 Offset 0
|
||||
OpMemberDecorate %foo 1 Offset 4
|
||||
OpMemberDecorate %foo 2 Offset 16
|
||||
OpMemberDecorate %foo 3 Offset 17
|
||||
OpMemberDecorate %foo 4 Offset 22
|
||||
OpDecorate %foo BufferBlock
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %_ Binding 0
|
||||
OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId
|
||||
OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%uint = OpTypeInt 32 0
|
||||
%float = OpTypeFloat 32
|
||||
%v3float = OpTypeVector %float 3
|
||||
%foo = OpTypeStruct %uint %v3float
|
||||
%uchar = OpTypeInt 8 0
|
||||
%v4uchar = OpTypeVector %uchar 4
|
||||
%half = OpTypeFloat 16
|
||||
%v2half = OpTypeVector %half 2
|
||||
%foo = OpTypeStruct %uint %v3float %uchar %v4uchar %v2half
|
||||
%_ptr_Uniform_foo = OpTypePointer Uniform %foo
|
||||
%_ = OpVariable %_ptr_Uniform_foo Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
@ -42,15 +62,47 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
|
||||
%int_3 = OpConstant %int 3
|
||||
%_ptr_Uniform_v4uchar = OpTypePointer Uniform %v4uchar
|
||||
%v4uint = OpTypeVector %uint 4
|
||||
%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input
|
||||
%int_4 = OpConstant %int 4
|
||||
%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
|
||||
%v2float = OpTypeVector %float 2
|
||||
%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input
|
||||
%v2uint = OpTypeVector %uint 2
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%19 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
|
||||
%20 = OpLoad %uint %19
|
||||
%22 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
|
||||
OpStore %22 %20
|
||||
%25 = OpLoad %v3uint %gl_GlobalInvocationID
|
||||
%26 = OpConvertUToF %v3float %25
|
||||
%28 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
|
||||
OpStore %28 %26
|
||||
%23 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
|
||||
%24 = OpLoad %uint %23
|
||||
%26 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
|
||||
OpStore %26 %24
|
||||
%29 = OpLoad %v3uint %gl_GlobalInvocationID
|
||||
%30 = OpConvertUToF %v3float %29
|
||||
%32 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
|
||||
OpStore %32 %30
|
||||
%35 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3
|
||||
%36 = OpLoad %v4uchar %35
|
||||
%38 = OpUConvert %v4uint %36
|
||||
%39 = OpVectorShuffle %v3uint %38 %38 0 1 2
|
||||
%41 = OpLoad %v3uint %gl_WorkGroupID
|
||||
%42 = OpIAdd %v3uint %39 %41
|
||||
%43 = OpCompositeExtract %uint %42 0
|
||||
%44 = OpCompositeExtract %uint %42 1
|
||||
%45 = OpCompositeExtract %uint %42 2
|
||||
%46 = OpCompositeConstruct %v4uint %43 %44 %45 %uint_0
|
||||
%47 = OpUConvert %v4uchar %46
|
||||
%48 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3
|
||||
OpStore %48 %47
|
||||
%51 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4
|
||||
%52 = OpLoad %v2half %51
|
||||
%54 = OpFConvert %v2float %52
|
||||
%57 = OpLoad %v3uint %gl_NumWorkGroups
|
||||
%58 = OpVectorShuffle %v2uint %57 %57 0 1
|
||||
%59 = OpConvertUToF %v2float %58
|
||||
%60 = OpFMul %v2float %54 %59
|
||||
%61 = OpFConvert %v2half %60
|
||||
%62 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4
|
||||
OpStore %62 %61
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -1231,8 +1231,8 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
|
||||
|
||||
auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
|
||||
|
||||
// Only 3-element vectors or 3-row matrices need to be packed.
|
||||
if (mbr_type.vecsize != 3)
|
||||
// Only vectors or 3-row matrices need to be packed.
|
||||
if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
|
||||
return false;
|
||||
|
||||
// Only row-major matrices need to be packed.
|
||||
@ -1240,7 +1240,11 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
|
||||
return false;
|
||||
|
||||
uint32_t component_size = mbr_type.width / 8;
|
||||
uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
|
||||
uint32_t unpacked_mbr_size;
|
||||
if (mbr_type.vecsize == 3)
|
||||
unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
|
||||
else
|
||||
unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
|
||||
if (is_array(mbr_type))
|
||||
{
|
||||
// If member is an array, and the array stride is larger than the type needs, don't pack it.
|
||||
|
Loading…
Reference in New Issue
Block a user