Forward meta information in OpCompositeExtract.

Just like OpAccessChain we need to make use of the meta information
available to use from access_chain_internal as we can extract a packed
vector or transposed vector from a composite, not just memory load.
This commit is contained in:
Hans-Kristian Arntzen 2019-01-07 10:43:55 +01:00
parent 169607cd28
commit 66263d4569
4 changed files with 203 additions and 5 deletions

View File

@ -0,0 +1,30 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Foo
{
packed_float3 a;
float b;
};
struct buf
{
Foo results[16];
float4 bar;
};
struct main0_out
{
float4 _entryPointOutput [[color(0)]];
};
fragment main0_out main0(constant buf& _11 [[buffer(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
int _67 = int(gl_FragCoord.x) % 16;
out._entryPointOutput = float4(dot(float3(_11.results[_67].a), _11.bar.xyz), _11.results[_67].b, 0.0, 0.0);
return out;
}

View File

@ -0,0 +1,48 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Foo
{
float3 a;
float b;
};
struct Foo_1
{
packed_float3 a;
float b;
};
struct buf
{
Foo_1 results[16];
float4 bar;
};
struct main0_out
{
float4 _entryPointOutput [[color(0)]];
};
float4 _main(thread const float4& pos, constant buf& v_11)
{
int _46 = int(pos.x) % 16;
Foo foo;
foo.a = v_11.results[_46].a;
foo.b = v_11.results[_46].b;
return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0);
}
fragment main0_out main0(constant buf& v_11 [[buffer(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
float4 pos = gl_FragCoord;
float4 param = pos;
out._entryPointOutput = _main(param, v_11);
return out;
}

View File

@ -0,0 +1,107 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 64
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %pos_1 %_entryPointOutput
OpExecutionMode %main OriginUpperLeft
OpSource HLSL 500
OpName %main "main"
OpName %_main_vf4_ "@main(vf4;"
OpName %pos "pos"
OpName %Foo "Foo"
OpMemberName %Foo 0 "a"
OpMemberName %Foo 1 "b"
OpName %foo "foo"
OpName %Foo_0 "Foo"
OpMemberName %Foo_0 0 "a"
OpMemberName %Foo_0 1 "b"
OpName %buf "buf"
OpMemberName %buf 0 "results"
OpMemberName %buf 1 "bar"
OpName %_ ""
OpName %pos_0 "pos"
OpName %pos_1 "pos"
OpName %_entryPointOutput "@entryPointOutput"
OpName %param "param"
OpMemberDecorate %Foo_0 0 Offset 0
OpMemberDecorate %Foo_0 1 Offset 12
OpDecorate %_arr_Foo_0_uint_16 ArrayStride 16
OpMemberDecorate %buf 0 Offset 0
OpMemberDecorate %buf 1 Offset 256
OpDecorate %buf Block
OpDecorate %_ DescriptorSet 0
OpDecorate %pos_1 BuiltIn FragCoord
OpDecorate %_entryPointOutput Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Function_v4float = OpTypePointer Function %v4float
%9 = OpTypeFunction %v4float %_ptr_Function_v4float
%v3float = OpTypeVector %float 3
%Foo = OpTypeStruct %v3float %float
%_ptr_Function_Foo = OpTypePointer Function %Foo
%Foo_0 = OpTypeStruct %v3float %float
%uint = OpTypeInt 32 0
%uint_16 = OpConstant %uint 16
%_arr_Foo_0_uint_16 = OpTypeArray %Foo_0 %uint_16
%buf = OpTypeStruct %_arr_Foo_0_uint_16 %v4float
%_ptr_Uniform_buf = OpTypePointer Uniform %buf
%_ = OpVariable %_ptr_Uniform_buf Uniform
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%uint_0 = OpConstant %uint 0
%_ptr_Function_float = OpTypePointer Function %float
%int_16 = OpConstant %int 16
%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
%_ptr_Function_v3float = OpTypePointer Function %v3float
%int_1 = OpConstant %int 1
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
%float_0 = OpConstant %float 0
%_ptr_Input_v4float = OpTypePointer Input %v4float
%pos_1 = OpVariable %_ptr_Input_v4float Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
%main = OpFunction %void None %3
%5 = OpLabel
%pos_0 = OpVariable %_ptr_Function_v4float Function
%param = OpVariable %_ptr_Function_v4float Function
%58 = OpLoad %v4float %pos_1
OpStore %pos_0 %58
%62 = OpLoad %v4float %pos_0
OpStore %param %62
%63 = OpFunctionCall %v4float %_main_vf4_ %param
OpStore %_entryPointOutput %63
OpReturn
OpFunctionEnd
%_main_vf4_ = OpFunction %v4float None %9
%pos = OpFunctionParameter %_ptr_Function_v4float
%12 = OpLabel
%foo = OpVariable %_ptr_Function_Foo Function
%28 = OpAccessChain %_ptr_Function_float %pos %uint_0
%29 = OpLoad %float %28
%30 = OpConvertFToS %int %29
%32 = OpSMod %int %30 %int_16
%34 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 %32
%35 = OpLoad %Foo_0 %34
%36 = OpCompositeExtract %v3float %35 0
%38 = OpAccessChain %_ptr_Function_v3float %foo %int_0
OpStore %38 %36
%39 = OpCompositeExtract %float %35 1
%41 = OpAccessChain %_ptr_Function_float %foo %int_1
OpStore %41 %39
%42 = OpAccessChain %_ptr_Function_v3float %foo %int_0
%43 = OpLoad %v3float %42
%45 = OpAccessChain %_ptr_Uniform_v4float %_ %int_1
%46 = OpLoad %v4float %45
%47 = OpVectorShuffle %v3float %46 %46 0 1 2
%48 = OpDot %float %43 %47
%49 = OpAccessChain %_ptr_Function_float %foo %int_1
%50 = OpLoad %float %49
%52 = OpCompositeConstruct %v4float %48 %50 %float_0 %float_0
OpReturnValue %52
OpFunctionEnd

View File

@ -6928,6 +6928,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (has_decoration(ops[2], DecorationCPacked))
allow_base_expression = false;
AccessChainMeta meta;
SPIRExpression *e = nullptr;
// Only apply this optimization if result is scalar.
if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
{
@ -6943,17 +6946,27 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
//
// Including the base will prevent this and would trigger multiple reads
// from expression causing it to be forced to an actual temporary in GLSL.
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true);
auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true, &meta);
e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
inherit_expression_dependencies(id, ops[2]);
e.base_expression = ops[2];
e->base_expression = ops[2];
}
else
{
auto expr = access_chain_internal(ops[2], &ops[3], length, true);
emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
auto expr = access_chain_internal(ops[2], &ops[3], length, true, false, &meta);
e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
inherit_expression_dependencies(id, ops[2]);
}
// Pass through some meta information to the loaded expression.
// We can still end up loading a buffer type to a variable, then CompositeExtract from it
// instead of loading everything through an access chain.
e->need_transpose = meta.need_transpose;
if (meta.storage_is_packed)
set_decoration(id, DecorationCPacked);
if (meta.storage_is_invariant)
set_decoration(id, DecorationInvariant);
break;
}