From 192a882df3813f85dd396c8c85a55a25af743dad Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 11 Jun 2018 16:21:38 +0200 Subject: [PATCH 1/2] Also unpack regular unary/binary operations on MSL. Apparently MSL gets confused when you have packed_float3 op float3 ... --- .../asm/frag/vector-shuffle-oom.asm.frag | 26 ++++++++--------- .../opt/shaders-msl/comp/struct-packing.comp | 2 +- .../shaders-msl/flatten/struct.flatten.vert | 2 +- .../frag/binary-unpack-pack-arguments.frag | 28 +++++++++++++++++++ .../opt/shaders-msl/vert/dynamic.flatten.vert | 2 +- .../opt/shaders-msl/vert/packed_matrix.vert | 4 +-- .../opt/shaders-msl/vert/ubo.alignment.vert | 2 +- .../asm/frag/vector-shuffle-oom.asm.frag | 26 ++++++++--------- .../shaders-msl/comp/struct-packing.comp | 2 +- .../shaders-msl/flatten/struct.flatten.vert | 2 +- .../frag/binary-unpack-pack-arguments.frag | 28 +++++++++++++++++++ .../shaders-msl/vert/dynamic.flatten.vert | 2 +- reference/shaders-msl/vert/packed_matrix.vert | 4 +-- reference/shaders-msl/vert/ubo.alignment.vert | 2 +- .../frag/binary-unpack-pack-arguments.frag | 15 ++++++++++ spirv_glsl.cpp | 10 +++---- 16 files changed, 114 insertions(+), 43 deletions(-) create mode 100644 reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag create mode 100644 reference/shaders-msl/frag/binary-unpack-pack-arguments.frag create mode 100644 shaders-msl/frag/binary-unpack-pack-arguments.frag diff --git a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index 5ea661f0..676fd823 100644 --- a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -103,7 +103,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float2 _95 = _88.xy; float2 _96 = _88.zw; float2 _97 = clamp(_82 + (float2(0.0, -2.0) * _7._m0.xy), _95, _96); - float3 _109 = _11._m5 * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _113 = _12.sample(_13, _97, level(0.0)); float _114 = _113.y; float3 _129; @@ -118,7 +118,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _130 = _129 * 0.5; float4 _134 = float4(_130.x, _130.y, _130.z, float4(0.0).w); float2 _144 = clamp(_82 + (float2(-1.0) * _7._m0.xy), _95, _96); - float3 _156 = _11._m5 * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); float _161 = _160.y; float3 _176; @@ -134,7 +134,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _180 = _134.xyz + _177; float4 _181 = float4(_180.x, _180.y, _180.z, _134.w); float2 _191 = clamp(_82 + (float2(0.0, -1.0) * _7._m0.xy), _95, _96); - float3 _203 = _11._m5 * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); float _208 = _207.y; float3 _223; @@ -150,7 +150,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _227 = _181.xyz + _224; float4 _228 = float4(_227.x, _227.y, _227.z, _181.w); float2 _238 = clamp(_82 + (float2(1.0, -1.0) * _7._m0.xy), _95, _96); - float3 _250 = _11._m5 * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); float _255 = _254.y; float3 _270; @@ -166,7 +166,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _274 = _228.xyz + _271; float4 _275 = float4(_274.x, _274.y, _274.z, _228.w); float2 _285 = clamp(_82 + (float2(-2.0, 0.0) * _7._m0.xy), _95, _96); - float3 _297 = _11._m5 * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); float _302 = _301.y; float3 _317; @@ -182,7 +182,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _321 = _275.xyz + _318; float4 _322 = float4(_321.x, _321.y, _321.z, _275.w); float2 _332 = clamp(_82 + (float2(-1.0, 0.0) * _7._m0.xy), _95, _96); - float3 _344 = _11._m5 * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); float _349 = _348.y; float3 _364; @@ -198,7 +198,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _368 = _322.xyz + _365; float4 _369 = float4(_368.x, _368.y, _368.z, _322.w); float2 _379 = clamp(_82, _95, _96); - float3 _391 = _11._m5 * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); float _396 = _395.y; float3 _411; @@ -214,7 +214,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _415 = _369.xyz + _412; float4 _416 = float4(_415.x, _415.y, _415.z, _369.w); float2 _426 = clamp(_82 + (float2(1.0, 0.0) * _7._m0.xy), _95, _96); - float3 _438 = _11._m5 * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); float _443 = _442.y; float3 _458; @@ -230,7 +230,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _462 = _416.xyz + _459; float4 _463 = float4(_462.x, _462.y, _462.z, _416.w); float2 _473 = clamp(_82 + (float2(2.0, 0.0) * _7._m0.xy), _95, _96); - float3 _485 = _11._m5 * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); float _490 = _489.y; float3 _505; @@ -246,7 +246,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _509 = _463.xyz + _506; float4 _510 = float4(_509.x, _509.y, _509.z, _463.w); float2 _520 = clamp(_82 + (float2(-1.0, 1.0) * _7._m0.xy), _95, _96); - float3 _532 = _11._m5 * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); float _537 = _536.y; float3 _552; @@ -262,7 +262,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _556 = _510.xyz + _553; float4 _557 = float4(_556.x, _556.y, _556.z, _510.w); float2 _567 = clamp(_82 + (float2(0.0, 1.0) * _7._m0.xy), _95, _96); - float3 _579 = _11._m5 * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); float _584 = _583.y; float3 _599; @@ -278,7 +278,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _603 = _557.xyz + _600; float4 _604 = float4(_603.x, _603.y, _603.z, _557.w); float2 _614 = clamp(_82 + _7._m0.xy, _95, _96); - float3 _626 = _11._m5 * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); float _631 = _630.y; float3 _646; @@ -294,7 +294,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float3 _650 = _604.xyz + _647; float4 _651 = float4(_650.x, _650.y, _650.z, _604.w); float2 _661 = clamp(_82 + (float2(0.0, 2.0) * _7._m0.xy), _95, _96); - float3 _673 = _11._m5 * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); float _678 = _677.y; float3 _693; diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp index a042f7aa..712a2844 100644 --- a/reference/opt/shaders-msl/comp/struct-packing.comp +++ b/reference/opt/shaders-msl/comp/struct-packing.comp @@ -121,6 +121,6 @@ kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; + ssbo_430.content.m1.a = ssbo_430.content.m3.a * float2x3(ssbo_430.m6[1][1]); } diff --git a/reference/opt/shaders-msl/flatten/struct.flatten.vert b/reference/opt/shaders-msl/flatten/struct.flatten.vert index 594d29fe..c8ab1df7 100644 --- a/reference/opt/shaders-msl/flatten/struct.flatten.vert +++ b/reference/opt/shaders-msl/flatten/struct.flatten.vert @@ -33,7 +33,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] main0_out out = {}; out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); - float3 _39 = in.aVertex.xyz - _18.light.Position; + float3 _39 = in.aVertex.xyz - float3(_18.light.Position); out.vColor += ((_18.light.Color * clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_39))); return out; } diff --git a/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag b/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag new file mode 100644 index 00000000..8ef624ee --- /dev/null +++ b/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + packed_float3 color; + float v; +}; + +struct main0_in +{ + float3 vIn [[user(locn0)]]; +}; + +struct main0_out +{ + float3 FragColor [[color(0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = cross(in.vIn, float3(_15.color) - in.vIn); + return out; +} + diff --git a/reference/opt/shaders-msl/vert/dynamic.flatten.vert b/reference/opt/shaders-msl/vert/dynamic.flatten.vert index 1b4134c7..64fe1622 100644 --- a/reference/opt/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/opt/shaders-msl/vert/dynamic.flatten.vert @@ -35,7 +35,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] out.vColor = float4(0.0); for (int _82 = 0; _82 < 4; ) { - float3 _54 = in.aVertex.xyz - _21.lights[_82].Position; + float3 _54 = in.aVertex.xyz - float3(_21.lights[_82].Position); out.vColor += ((_21.lights[_82].Color * clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_54))); _82++; continue; diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert index 79360ba9..f1f83d46 100644 --- a/reference/opt/shaders-msl/vert/packed_matrix.vert +++ b/reference/opt/shaders-msl/vert/packed_matrix.vert @@ -40,8 +40,8 @@ struct main0_out vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)]], constant _15& _17 [[buffer(13)]]) { main0_out out = {}; - float4 _70 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); - out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1); + float4 _70 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); + out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * float4x3(_17._m1)); float4 _95 = _70; _95.y = -_70.y; out.gl_Position = _95; diff --git a/reference/opt/shaders-msl/vert/ubo.alignment.vert b/reference/opt/shaders-msl/vert/ubo.alignment.vert index 6e48ae0e..bc076def 100644 --- a/reference/opt/shaders-msl/vert/ubo.alignment.vert +++ b/reference/opt/shaders-msl/vert/ubo.alignment.vert @@ -31,7 +31,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] main0_out out = {}; out.gl_Position = _18.mvp * in.aVertex; out.vNormal = in.aNormal; - out.vColor = _18.color * _18.opacity; + out.vColor = float3(_18.color) * _18.opacity; out.vSize = _18.targSize * _18.opacity; return out; } diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index 6c6fd968..9f9b827c 100644 --- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -103,7 +103,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float2 _82 = gl_FragCoord.xy * _19._m23.xy; float4 _88 = _7._m2 * _7._m0.xyxy; float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _109 = _11._m5 * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _113 = _12.sample(_13, _97, level(0.0)); float3 _129; if (_113.y > 0.0) @@ -120,7 +120,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _135 = _77; _135._m0 = _134; float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _156 = _11._m5 * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); float3 _176; if (_160.y > 0.0) @@ -137,7 +137,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _182 = _135; _182._m0 = _181; float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _203 = _11._m5 * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); float3 _223; if (_207.y > 0.0) @@ -154,7 +154,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _229 = _182; _229._m0 = _228; float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _250 = _11._m5 * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); float3 _270; if (_254.y > 0.0) @@ -171,7 +171,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _276 = _229; _276._m0 = _275; float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _297 = _11._m5 * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); float3 _317; if (_301.y > 0.0) @@ -188,7 +188,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _323 = _276; _323._m0 = _322; float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _344 = _11._m5 * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); float3 _364; if (_348.y > 0.0) @@ -205,7 +205,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _370 = _323; _370._m0 = _369; float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _391 = _11._m5 * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); float3 _411; if (_395.y > 0.0) @@ -222,7 +222,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _417 = _370; _417._m0 = _416; float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _438 = _11._m5 * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); float3 _458; if (_442.y > 0.0) @@ -239,7 +239,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _464 = _417; _464._m0 = _463; float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _485 = _11._m5 * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); float3 _505; if (_489.y > 0.0) @@ -256,7 +256,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _511 = _464; _511._m0 = _510; float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _532 = _11._m5 * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); float3 _552; if (_536.y > 0.0) @@ -273,7 +273,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _558 = _511; _558._m0 = _557; float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _579 = _11._m5 * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); float3 _599; if (_583.y > 0.0) @@ -290,7 +290,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _605 = _558; _605._m0 = _604; float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _626 = _11._m5 * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); float3 _646; if (_630.y > 0.0) @@ -307,7 +307,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _28 _652 = _605; _652._m0 = _651; float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _673 = _11._m5 * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); + float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); float3 _693; if (_677.y > 0.0) diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp index 2b37844f..1213ec87 100644 --- a/reference/shaders-msl/comp/struct-packing.comp +++ b/reference/shaders-msl/comp/struct-packing.comp @@ -144,6 +144,6 @@ kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; + ssbo_430.content.m1.a = ssbo_430.content.m3.a * float2x3(ssbo_430.m6[1][1]); } diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert index 75f58e1e..49c32bb9 100644 --- a/reference/shaders-msl/flatten/struct.flatten.vert +++ b/reference/shaders-msl/flatten/struct.flatten.vert @@ -33,7 +33,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] main0_out out = {}; out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); - float3 L = in.aVertex.xyz - _18.light.Position; + float3 L = in.aVertex.xyz - float3(_18.light.Position); out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); return out; } diff --git a/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag b/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag new file mode 100644 index 00000000..8ef624ee --- /dev/null +++ b/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + packed_float3 color; + float v; +}; + +struct main0_in +{ + float3 vIn [[user(locn0)]]; +}; + +struct main0_out +{ + float3 FragColor [[color(0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]]) +{ + main0_out out = {}; + out.FragColor = cross(in.vIn, float3(_15.color) - in.vIn); + return out; +} + diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert index 696966ca..4f6a5b1c 100644 --- a/reference/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/shaders-msl/vert/dynamic.flatten.vert @@ -35,7 +35,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] out.vColor = float4(0.0); for (int i = 0; i < 4; i++) { - float3 L = in.aVertex.xyz - _21.lights[i].Position; + float3 L = in.aVertex.xyz - float3(_21.lights[i].Position); out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); } return out; diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert index 1f26a68c..15172d19 100644 --- a/reference/shaders-msl/vert/packed_matrix.vert +++ b/reference/shaders-msl/vert/packed_matrix.vert @@ -44,10 +44,10 @@ vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)] float3 _13; do { - _13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1); + _13 = normalize(float4(in.m_25.xyz, 0.0) * float4x3(_17._m1)); break; } while (false); - float4 _39 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); + float4 _39 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); out.m_72 = _13; float4 _74 = _39; _74.y = -_39.y; diff --git a/reference/shaders-msl/vert/ubo.alignment.vert b/reference/shaders-msl/vert/ubo.alignment.vert index 6e48ae0e..bc076def 100644 --- a/reference/shaders-msl/vert/ubo.alignment.vert +++ b/reference/shaders-msl/vert/ubo.alignment.vert @@ -31,7 +31,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] main0_out out = {}; out.gl_Position = _18.mvp * in.aVertex; out.vNormal = in.aNormal; - out.vColor = _18.color * _18.opacity; + out.vColor = float3(_18.color) * _18.opacity; out.vSize = _18.targSize * _18.opacity; return out; } diff --git a/shaders-msl/frag/binary-unpack-pack-arguments.frag b/shaders-msl/frag/binary-unpack-pack-arguments.frag new file mode 100644 index 00000000..be30f84d --- /dev/null +++ b/shaders-msl/frag/binary-unpack-pack-arguments.frag @@ -0,0 +1,15 @@ +#version 450 +layout(location = 0) out vec3 FragColor; + +layout(binding = 0, std140) uniform UBO +{ + vec3 color; + float v; +}; + +layout(location = 0) in vec3 vIn; + +void main() +{ + FragColor = cross(vIn, color - vIn); +} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 1e5e7e1e..e640ec2b 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3280,15 +3280,15 @@ SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); - emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward); + emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { bool forward = should_forward(op0) && should_forward(op1); - emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)), - forward); + emit_op(result_type, result_id, + join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); @@ -3368,8 +3368,8 @@ SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_o else { // If we don't cast, our actual input type is that of the first (or second) argument. - cast_op0 = to_enclosed_expression(op0); - cast_op1 = to_enclosed_expression(op1); + cast_op0 = to_enclosed_unpacked_expression(op0); + cast_op1 = to_enclosed_unpacked_expression(op1); input_type = type0.basetype; } From 58fab58e5e94bbbc0477b0e6bd36e6170c65276b Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 12 Jun 2018 09:36:13 +0200 Subject: [PATCH 2/2] Do not unpack transposed matrices. --- reference/opt/shaders-msl/comp/struct-packing.comp | 2 +- reference/opt/shaders-msl/vert/packed_matrix.vert | 2 +- reference/shaders-msl/comp/struct-packing.comp | 2 +- reference/shaders-msl/vert/packed_matrix.vert | 2 +- spirv_glsl.cpp | 10 ++++++++-- spirv_msl.cpp | 9 +++++++++ 6 files changed, 21 insertions(+), 6 deletions(-) diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp index 712a2844..a042f7aa 100644 --- a/reference/opt/shaders-msl/comp/struct-packing.comp +++ b/reference/opt/shaders-msl/comp/struct-packing.comp @@ -121,6 +121,6 @@ kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * float2x3(ssbo_430.m6[1][1]); + ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; } diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert index f1f83d46..4c984af1 100644 --- a/reference/opt/shaders-msl/vert/packed_matrix.vert +++ b/reference/opt/shaders-msl/vert/packed_matrix.vert @@ -41,7 +41,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)] { main0_out out = {}; float4 _70 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); - out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * float4x3(_17._m1)); + out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1); float4 _95 = _70; _95.y = -_70.y; out.gl_Position = _95; diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp index 1213ec87..2b37844f 100644 --- a/reference/shaders-msl/comp/struct-packing.comp +++ b/reference/shaders-msl/comp/struct-packing.comp @@ -144,6 +144,6 @@ kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [ ssbo_430.content.m3s[5].c = _60.m3s[5].c; ssbo_430.content.m3s[6].c = _60.m3s[6].c; ssbo_430.content.m3s[7].c = _60.m3s[7].c; - ssbo_430.content.m1.a = ssbo_430.content.m3.a * float2x3(ssbo_430.m6[1][1]); + ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1]; } diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert index 15172d19..c63b04fd 100644 --- a/reference/shaders-msl/vert/packed_matrix.vert +++ b/reference/shaders-msl/vert/packed_matrix.vert @@ -44,7 +44,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)] float3 _13; do { - _13 = normalize(float4(in.m_25.xyz, 0.0) * float4x3(_17._m1)); + _13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1); break; } while (false); float4 _39 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0); diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index e640ec2b..0964f6ff 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -2375,7 +2375,10 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id) string CompilerGLSL::to_unpacked_expression(uint32_t id) { - if (has_decoration(id, DecorationCPacked)) + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get(id); + bool need_transpose = e && e->need_transpose; + if (!need_transpose && has_decoration(id, DecorationCPacked)) return unpack_expression_type(to_expression(id), expression_type(id)); else return to_expression(id); @@ -2383,7 +2386,10 @@ string CompilerGLSL::to_unpacked_expression(uint32_t id) string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id) { - if (has_decoration(id, DecorationCPacked)) + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get(id); + bool need_transpose = e && e->need_transpose; + if (!need_transpose && has_decoration(id, DecorationCPacked)) return unpack_expression_type(to_expression(id), expression_type(id)); else return to_enclosed_expression(id); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index bd5d13dc..e0ecd877 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -2042,7 +2042,16 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) if (e && e->need_transpose && (t.columns == t.vecsize || is_packed)) { e->need_transpose = false; + + // This is important for matrices. Packed matrices + // are generally transposed, so unpacking using a constructor argument + // will result in an error. + // The simplest solution for now is to just avoid unpacking the matrix in this operation. + unset_decoration(mtx_id, DecorationCPacked); + emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); + if (is_packed) + set_decoration(mtx_id, DecorationCPacked); e->need_transpose = true; } else