Merge pull request #804 from KhronosGroup/fix-788
Forward meta information in OpCompositeExtract.
This commit is contained in:
commit
cacfeef89e
@ -0,0 +1,30 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct Foo
|
||||
{
|
||||
packed_float3 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct buf
|
||||
{
|
||||
Foo results[16];
|
||||
float4 bar;
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 _entryPointOutput [[color(0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(constant buf& _11 [[buffer(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
int _67 = int(gl_FragCoord.x) % 16;
|
||||
out._entryPointOutput = float4(dot(float3(_11.results[_67].a), _11.bar.xyz), _11.results[_67].b, 0.0, 0.0);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,48 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct Foo
|
||||
{
|
||||
float3 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct Foo_1
|
||||
{
|
||||
packed_float3 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct buf
|
||||
{
|
||||
Foo_1 results[16];
|
||||
float4 bar;
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 _entryPointOutput [[color(0)]];
|
||||
};
|
||||
|
||||
float4 _main(thread const float4& pos, constant buf& v_11)
|
||||
{
|
||||
int _46 = int(pos.x) % 16;
|
||||
Foo foo;
|
||||
foo.a = v_11.results[_46].a;
|
||||
foo.b = v_11.results[_46].b;
|
||||
return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0);
|
||||
}
|
||||
|
||||
fragment main0_out main0(constant buf& v_11 [[buffer(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 pos = gl_FragCoord;
|
||||
float4 param = pos;
|
||||
out._entryPointOutput = _main(param, v_11);
|
||||
return out;
|
||||
}
|
||||
|
107
shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
Normal file
107
shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
Normal file
@ -0,0 +1,107 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 64
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %pos_1 %_entryPointOutput
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource HLSL 500
|
||||
OpName %main "main"
|
||||
OpName %_main_vf4_ "@main(vf4;"
|
||||
OpName %pos "pos"
|
||||
OpName %Foo "Foo"
|
||||
OpMemberName %Foo 0 "a"
|
||||
OpMemberName %Foo 1 "b"
|
||||
OpName %foo "foo"
|
||||
OpName %Foo_0 "Foo"
|
||||
OpMemberName %Foo_0 0 "a"
|
||||
OpMemberName %Foo_0 1 "b"
|
||||
OpName %buf "buf"
|
||||
OpMemberName %buf 0 "results"
|
||||
OpMemberName %buf 1 "bar"
|
||||
OpName %_ ""
|
||||
OpName %pos_0 "pos"
|
||||
OpName %pos_1 "pos"
|
||||
OpName %_entryPointOutput "@entryPointOutput"
|
||||
OpName %param "param"
|
||||
OpMemberDecorate %Foo_0 0 Offset 0
|
||||
OpMemberDecorate %Foo_0 1 Offset 12
|
||||
OpDecorate %_arr_Foo_0_uint_16 ArrayStride 16
|
||||
OpMemberDecorate %buf 0 Offset 0
|
||||
OpMemberDecorate %buf 1 Offset 256
|
||||
OpDecorate %buf Block
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %pos_1 BuiltIn FragCoord
|
||||
OpDecorate %_entryPointOutput Location 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%9 = OpTypeFunction %v4float %_ptr_Function_v4float
|
||||
%v3float = OpTypeVector %float 3
|
||||
%Foo = OpTypeStruct %v3float %float
|
||||
%_ptr_Function_Foo = OpTypePointer Function %Foo
|
||||
%Foo_0 = OpTypeStruct %v3float %float
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_Foo_0_uint_16 = OpTypeArray %Foo_0 %uint_16
|
||||
%buf = OpTypeStruct %_arr_Foo_0_uint_16 %v4float
|
||||
%_ptr_Uniform_buf = OpTypePointer Uniform %buf
|
||||
%_ = OpVariable %_ptr_Uniform_buf Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_float = OpTypePointer Function %float
|
||||
%int_16 = OpConstant %int 16
|
||||
%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
|
||||
%float_0 = OpConstant %float 0
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%pos_1 = OpVariable %_ptr_Input_v4float Input
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%pos_0 = OpVariable %_ptr_Function_v4float Function
|
||||
%param = OpVariable %_ptr_Function_v4float Function
|
||||
%58 = OpLoad %v4float %pos_1
|
||||
OpStore %pos_0 %58
|
||||
%62 = OpLoad %v4float %pos_0
|
||||
OpStore %param %62
|
||||
%63 = OpFunctionCall %v4float %_main_vf4_ %param
|
||||
OpStore %_entryPointOutput %63
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
%_main_vf4_ = OpFunction %v4float None %9
|
||||
%pos = OpFunctionParameter %_ptr_Function_v4float
|
||||
%12 = OpLabel
|
||||
%foo = OpVariable %_ptr_Function_Foo Function
|
||||
%28 = OpAccessChain %_ptr_Function_float %pos %uint_0
|
||||
%29 = OpLoad %float %28
|
||||
%30 = OpConvertFToS %int %29
|
||||
%32 = OpSMod %int %30 %int_16
|
||||
%34 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 %32
|
||||
%35 = OpLoad %Foo_0 %34
|
||||
%36 = OpCompositeExtract %v3float %35 0
|
||||
%38 = OpAccessChain %_ptr_Function_v3float %foo %int_0
|
||||
OpStore %38 %36
|
||||
%39 = OpCompositeExtract %float %35 1
|
||||
%41 = OpAccessChain %_ptr_Function_float %foo %int_1
|
||||
OpStore %41 %39
|
||||
%42 = OpAccessChain %_ptr_Function_v3float %foo %int_0
|
||||
%43 = OpLoad %v3float %42
|
||||
%45 = OpAccessChain %_ptr_Uniform_v4float %_ %int_1
|
||||
%46 = OpLoad %v4float %45
|
||||
%47 = OpVectorShuffle %v3float %46 %46 0 1 2
|
||||
%48 = OpDot %float %43 %47
|
||||
%49 = OpAccessChain %_ptr_Function_float %foo %int_1
|
||||
%50 = OpLoad %float %49
|
||||
%52 = OpCompositeConstruct %v4float %48 %50 %float_0 %float_0
|
||||
OpReturnValue %52
|
||||
OpFunctionEnd
|
@ -6936,6 +6936,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
if (has_decoration(ops[2], DecorationCPacked))
|
||||
allow_base_expression = false;
|
||||
|
||||
AccessChainMeta meta;
|
||||
SPIRExpression *e = nullptr;
|
||||
|
||||
// Only apply this optimization if result is scalar.
|
||||
if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
|
||||
{
|
||||
@ -6951,17 +6954,27 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
//
|
||||
// Including the base will prevent this and would trigger multiple reads
|
||||
// from expression causing it to be forced to an actual temporary in GLSL.
|
||||
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true);
|
||||
auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
|
||||
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true, &meta);
|
||||
e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
|
||||
inherit_expression_dependencies(id, ops[2]);
|
||||
e.base_expression = ops[2];
|
||||
e->base_expression = ops[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
auto expr = access_chain_internal(ops[2], &ops[3], length, true);
|
||||
emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
|
||||
auto expr = access_chain_internal(ops[2], &ops[3], length, true, false, &meta);
|
||||
e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
|
||||
inherit_expression_dependencies(id, ops[2]);
|
||||
}
|
||||
|
||||
// Pass through some meta information to the loaded expression.
|
||||
// We can still end up loading a buffer type to a variable, then CompositeExtract from it
|
||||
// instead of loading everything through an access chain.
|
||||
e->need_transpose = meta.need_transpose;
|
||||
if (meta.storage_is_packed)
|
||||
set_decoration(id, DecorationCPacked);
|
||||
if (meta.storage_is_invariant)
|
||||
set_decoration(id, DecorationInvariant);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user