diff --git a/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag new file mode 100644 index 00000000..649f9f6f --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag @@ -0,0 +1,30 @@ +#include +#include + +using namespace metal; + +struct Foo +{ + packed_float3 a; + float b; +}; + +struct buf +{ + Foo results[16]; + float4 bar; +}; + +struct main0_out +{ + float4 _entryPointOutput [[color(0)]]; +}; + +fragment main0_out main0(constant buf& _11 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + int _67 = int(gl_FragCoord.x) % 16; + out._entryPointOutput = float4(dot(float3(_11.results[_67].a), _11.bar.xyz), _11.results[_67].b, 0.0, 0.0); + return out; +} + diff --git a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag new file mode 100644 index 00000000..1eabc93a --- /dev/null +++ b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag @@ -0,0 +1,48 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Foo +{ + float3 a; + float b; +}; + +struct Foo_1 +{ + packed_float3 a; + float b; +}; + +struct buf +{ + Foo_1 results[16]; + float4 bar; +}; + +struct main0_out +{ + float4 _entryPointOutput [[color(0)]]; +}; + +float4 _main(thread const float4& pos, constant buf& v_11) +{ + int _46 = int(pos.x) % 16; + Foo foo; + foo.a = v_11.results[_46].a; + foo.b = v_11.results[_46].b; + return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0); +} + +fragment main0_out main0(constant buf& v_11 [[buffer(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + float4 pos = gl_FragCoord; + float4 param = pos; + out._entryPointOutput = _main(param, v_11); + return out; +} + diff --git a/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag new file mode 100644 index 00000000..e205a15c --- /dev/null +++ b/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag @@ -0,0 +1,107 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 64 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %pos_1 %_entryPointOutput + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 500 + OpName %main "main" + OpName %_main_vf4_ "@main(vf4;" + OpName %pos "pos" + OpName %Foo "Foo" + OpMemberName %Foo 0 "a" + OpMemberName %Foo 1 "b" + OpName %foo "foo" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "a" + OpMemberName %Foo_0 1 "b" + OpName %buf "buf" + OpMemberName %buf 0 "results" + OpMemberName %buf 1 "bar" + OpName %_ "" + OpName %pos_0 "pos" + OpName %pos_1 "pos" + OpName %_entryPointOutput "@entryPointOutput" + OpName %param "param" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Foo_0 1 Offset 12 + OpDecorate %_arr_Foo_0_uint_16 ArrayStride 16 + OpMemberDecorate %buf 0 Offset 0 + OpMemberDecorate %buf 1 Offset 256 + OpDecorate %buf Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %pos_1 BuiltIn FragCoord + OpDecorate %_entryPointOutput Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %9 = OpTypeFunction %v4float %_ptr_Function_v4float + %v3float = OpTypeVector %float 3 + %Foo = OpTypeStruct %v3float %float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %v3float %float + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_Foo_0_uint_16 = OpTypeArray %Foo_0 %uint_16 + %buf = OpTypeStruct %_arr_Foo_0_uint_16 %v4float +%_ptr_Uniform_buf = OpTypePointer Uniform %buf + %_ = OpVariable %_ptr_Uniform_buf Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %int_16 = OpConstant %int 16 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %float_0 = OpConstant %float 0 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %pos_1 = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %pos_0 = OpVariable %_ptr_Function_v4float Function + %param = OpVariable %_ptr_Function_v4float Function + %58 = OpLoad %v4float %pos_1 + OpStore %pos_0 %58 + %62 = OpLoad %v4float %pos_0 + OpStore %param %62 + %63 = OpFunctionCall %v4float %_main_vf4_ %param + OpStore %_entryPointOutput %63 + OpReturn + OpFunctionEnd + %_main_vf4_ = OpFunction %v4float None %9 + %pos = OpFunctionParameter %_ptr_Function_v4float + %12 = OpLabel + %foo = OpVariable %_ptr_Function_Foo Function + %28 = OpAccessChain %_ptr_Function_float %pos %uint_0 + %29 = OpLoad %float %28 + %30 = OpConvertFToS %int %29 + %32 = OpSMod %int %30 %int_16 + %34 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 %32 + %35 = OpLoad %Foo_0 %34 + %36 = OpCompositeExtract %v3float %35 0 + %38 = OpAccessChain %_ptr_Function_v3float %foo %int_0 + OpStore %38 %36 + %39 = OpCompositeExtract %float %35 1 + %41 = OpAccessChain %_ptr_Function_float %foo %int_1 + OpStore %41 %39 + %42 = OpAccessChain %_ptr_Function_v3float %foo %int_0 + %43 = OpLoad %v3float %42 + %45 = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 + %46 = OpLoad %v4float %45 + %47 = OpVectorShuffle %v3float %46 %46 0 1 2 + %48 = OpDot %float %43 %47 + %49 = OpAccessChain %_ptr_Function_float %foo %int_1 + %50 = OpLoad %float %49 + %52 = OpCompositeConstruct %v4float %48 %50 %float_0 %float_0 + OpReturnValue %52 + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0f6b3e3a..08e6f247 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -6928,6 +6928,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (has_decoration(ops[2], DecorationCPacked)) allow_base_expression = false; + AccessChainMeta meta; + SPIRExpression *e = nullptr; + // Only apply this optimization if result is scalar. if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) { @@ -6943,17 +6946,27 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // // Including the base will prevent this and would trigger multiple reads // from expression causing it to be forced to an actual temporary in GLSL. - auto expr = access_chain_internal(ops[2], &ops[3], length, true, true); - auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); + auto expr = access_chain_internal(ops[2], &ops[3], length, true, true, &meta); + e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); inherit_expression_dependencies(id, ops[2]); - e.base_expression = ops[2]; + e->base_expression = ops[2]; } else { - auto expr = access_chain_internal(ops[2], &ops[3], length, true); - emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); + auto expr = access_chain_internal(ops[2], &ops[3], length, true, false, &meta); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); inherit_expression_dependencies(id, ops[2]); } + + // Pass through some meta information to the loaded expression. + // We can still end up loading a buffer type to a variable, then CompositeExtract from it + // instead of loading everything through an access chain. + e->need_transpose = meta.need_transpose; + if (meta.storage_is_packed) + set_decoration(id, DecorationCPacked); + if (meta.storage_is_invariant) + set_decoration(id, DecorationInvariant); + break; }