From 0aedc7d128dcd3e71c9407b3d270ca4ed6098b87 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 27 May 2022 12:43:12 +0200 Subject: [PATCH 1/2] Roll dependencies. --- checkout_glslang_spirv_tools.sh | 6 +- ...d-unpack-uint2.fxconly.nofxc.sm60.asm.frag | 3 +- .../opt/shaders-hlsl/vert/invariant.vert | 3 +- .../frag/descriptor-array-unnamed.asm.frag | 2 +- .../interpolation-qualifiers-struct.asm.frag | 2 +- .../asm/frag/vector-shuffle-oom.asm.frag | 36 ++--- .../asm/tese/unnamed-builtin-array.asm.tese | 2 +- reference/opt/shaders-msl/comp/insert.comp | 9 +- .../frag/interpolation-qualifiers-block.frag | 2 +- .../frag/interpolation-qualifiers.frag | 2 +- .../frag/shader-arithmetic-8bit.frag | 20 +-- .../tesc/water_tess.multi-patch.tesc | 18 +-- .../opt/shaders-msl/tesc/water_tess.tesc | 18 +-- .../opt/shaders-msl/tese/quad.domain.tese | 2 +- reference/opt/shaders-msl/tese/quad.tese | 2 +- .../opt/shaders-msl/tese/water_tess.tese | 9 +- .../opt/shaders-msl/vert/invariant.msl21.vert | 3 +- .../asm/frag/depth-compare.asm.frag | 23 +-- .../asm/frag/global-constant-arrays.asm.frag | 119 ++++++-------- .../padded-float-array-member-defef.asm.frag | 153 ++++++++---------- .../asm/frag/sample-mask-not-array.asm.frag | 8 +- ...bpass-input.ios.framebuffer-fetch.asm.frag | 2 +- ...ass-input.msl23.framebuffer-fetch.asm.frag | 2 +- .../asm/tese/ds-double-gl-in-deref.asm.tese | 14 +- .../asm/tese/ds-patch-input-fixes.asm.tese | 38 ++--- .../asm/tese/ds-patch-inputs.asm.tese | 8 +- .../asm/tese/ds-texcoord-array.asm.tese | 16 +- .../asm/vert/array-missing-copies.asm.vert | 31 ++-- .../asm/vert/texture-buffer.asm.vert | 10 +- .../asm/frag/loop-header-to-continue.asm.frag | 7 +- .../asm/frag/pack-and-unpack-uint2.asm.frag | 3 +- .../asm/frag/vector-shuffle-oom.asm.frag | 36 ++--- .../opt/shaders/comp/generate_height.comp | 4 +- reference/opt/shaders/comp/insert.comp | 9 +- .../avoid-expression-lowering-to-loop.frag | 2 +- .../frag/ubo-load-row-major-workaround.frag | 2 +- reference/opt/shaders/tesc/water_tess.tesc | 18 +-- reference/opt/shaders/tese/water_tess.tese | 9 +- reference/opt/shaders/vert/ground.vert | 7 +- reference/opt/shaders/vert/invariant.vert | 3 +- reference/opt/shaders/vert/ocean.vert | 15 +- ...shader-arithmetic-8bit.nocompat.vk.frag.vk | 20 +-- spirv.h | 49 +++++- spirv.hpp | 49 +++++- 44 files changed, 401 insertions(+), 395 deletions(-) diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh index f318d9ab..7f014555 100755 --- a/checkout_glslang_spirv_tools.sh +++ b/checkout_glslang_spirv_tools.sh @@ -2,9 +2,9 @@ # Copyright 2016-2021 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 -GLSLANG_REV=df609a01b386001e367709086c58529c48028d1e -SPIRV_TOOLS_REV=75e53b9f685830ac42242cf0c46cc9af523bd0df -SPIRV_HEADERS_REV=b8047fbe45f426f5918fadc67e8408f5b108c3c9 +GLSLANG_REV=7dda6a6347b0bd550e202942adee475956ef462a +SPIRV_TOOLS_REV=82d91083cb56c89d2cb8e9d56d4d69f07ac34fed +SPIRV_HEADERS_REV=5a121866927a16ab9d49bed4788b532c7fcea766 PROTOCOL=https if [ -d external/glslang ]; then diff --git a/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag index 358773b2..5926eef7 100644 --- a/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag @@ -7,8 +7,7 @@ struct SPIRV_Cross_Output void frag_main() { - uint2 unpacked = uint2(18u, 52u); - FragColor = float4(float(unpacked.x), float(unpacked.y), 1.0f, 1.0f); + FragColor = float4(18.0f, 52.0f, 1.0f, 1.0f); } SPIRV_Cross_Output main() diff --git a/reference/opt/shaders-hlsl/vert/invariant.vert b/reference/opt/shaders-hlsl/vert/invariant.vert index 54739626..ae1ae4b7 100644 --- a/reference/opt/shaders-hlsl/vert/invariant.vert +++ b/reference/opt/shaders-hlsl/vert/invariant.vert @@ -19,8 +19,7 @@ struct SPIRV_Cross_Output void vert_main() { - float4 _20 = vInput1 * vInput2; - float4 _21 = vInput0 + _20; + float4 _21 = mad(vInput1, vInput2, vInput0); gl_Position = _21; float4 _27 = vInput0 - vInput1; float4 _29 = _27 * vInput2; diff --git a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag index 1870f671..58f02ad0 100644 --- a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag @@ -42,7 +42,7 @@ fragment main0_out main0(const device _4* _5_0 [[buffer(0)]], const device _4* _ }; main0_out out = {}; - out.m_3 = _5[_20._m0]->_m0 + (_8[_20._m0]->_m0 * float4(0.20000000298023223876953125)); + out.m_3 = fma(_8[_20._m0]->_m0, float4(0.20000000298023223876953125), _5[_20._m0]->_m0); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag index 6407b32b..fe49e09a 100644 --- a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag @@ -41,7 +41,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) inp.v4 = in.inp_v4; inp.v5 = in.inp_v5; inp.v6 = in.inp_v6; - out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); + out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6); return out; } diff --git a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index 39bd33d5..2031b335 100644 --- a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -88,8 +88,6 @@ struct _18 float4 _m38[2]; }; -constant _28 _74 = {}; - struct main0_out { float4 m_5 [[color(0)]]; @@ -98,11 +96,10 @@ struct main0_out fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d _8 [[texture(0)]], texture2d _12 [[texture(1)]], texture2d _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - float2 _82 = gl_FragCoord.xy * _19._m23.xy; float4 _88 = _7._m2 * _7._m0.xyxy; float2 _95 = _88.xy; float2 _96 = _88.zw; - float2 _97 = fast::clamp(_82 + (float2(0.0, -2.0) * _7._m0.xy), _95, _96); + float2 _97 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -2.0) * _7._m0.xy), _95, _96); float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _113 = _12.sample(_13, _97, level(0.0)); float _114 = _113.y; @@ -115,7 +112,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _129 = _109; } - float2 _144 = fast::clamp(_82 + (float2(-1.0) * _7._m0.xy), _95, _96); + float2 _144 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0) * _7._m0.xy), _95, _96); float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); float _161 = _160.y; @@ -128,7 +125,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _176 = _156; } - float2 _191 = fast::clamp(_82 + (float2(0.0, -1.0) * _7._m0.xy), _95, _96); + float2 _191 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -1.0) * _7._m0.xy), _95, _96); float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); float _208 = _207.y; @@ -141,7 +138,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _223 = _203; } - float2 _238 = fast::clamp(_82 + (float2(1.0, -1.0) * _7._m0.xy), _95, _96); + float2 _238 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, -1.0) * _7._m0.xy), _95, _96); float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); float _255 = _254.y; @@ -154,7 +151,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _270 = _250; } - float2 _285 = fast::clamp(_82 + (float2(-2.0, 0.0) * _7._m0.xy), _95, _96); + float2 _285 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-2.0, 0.0) * _7._m0.xy), _95, _96); float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); float _302 = _301.y; @@ -167,7 +164,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _317 = _297; } - float2 _332 = fast::clamp(_82 + (float2(-1.0, 0.0) * _7._m0.xy), _95, _96); + float2 _332 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 0.0) * _7._m0.xy), _95, _96); float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); float _349 = _348.y; @@ -180,7 +177,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _364 = _344; } - float2 _379 = fast::clamp(_82, _95, _96); + float2 _379 = fast::clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96); float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); float _396 = _395.y; @@ -193,7 +190,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _411 = _391; } - float2 _426 = fast::clamp(_82 + (float2(1.0, 0.0) * _7._m0.xy), _95, _96); + float2 _426 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, 0.0) * _7._m0.xy), _95, _96); float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); float _443 = _442.y; @@ -206,7 +203,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _458 = _438; } - float2 _473 = fast::clamp(_82 + (float2(2.0, 0.0) * _7._m0.xy), _95, _96); + float2 _473 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(2.0, 0.0) * _7._m0.xy), _95, _96); float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); float _490 = _489.y; @@ -219,7 +216,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _505 = _485; } - float2 _520 = fast::clamp(_82 + (float2(-1.0, 1.0) * _7._m0.xy), _95, _96); + float2 _520 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 1.0) * _7._m0.xy), _95, _96); float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); float _537 = _536.y; @@ -232,7 +229,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _552 = _532; } - float2 _567 = fast::clamp(_82 + (float2(0.0, 1.0) * _7._m0.xy), _95, _96); + float2 _567 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 1.0) * _7._m0.xy), _95, _96); float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); float _584 = _583.y; @@ -245,7 +242,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _599 = _579; } - float2 _614 = fast::clamp(_82 + _7._m0.xy, _95, _96); + float2 _614 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96); float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); float _631 = _630.y; @@ -258,7 +255,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff { _646 = _626; } - float2 _661 = fast::clamp(_82 + (float2(0.0, 2.0) * _7._m0.xy), _95, _96); + float2 _661 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 2.0) * _7._m0.xy), _95, _96); float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); float _678 = _677.y; @@ -272,10 +269,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff _693 = _673; } float3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * float3(0.125); - _28 _704; - _704._m0 = float4(_702.x, _702.y, _702.z, float4(0.0).w); - _704._m0.w = 1.0; - out.m_5 = _704._m0; + _28 _750 = _28{ float4(_702.x, _702.y, _702.z, float4(0.0).w) }; + _750._m0.w = 1.0; + out.m_5 = _750._m0; return out; } diff --git a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese index b6bdf415..2245cb98 100644 --- a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese +++ b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese @@ -67,7 +67,7 @@ struct main0_patchIn gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter.z; gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter.w; float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); - out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/comp/insert.comp b/reference/opt/shaders-msl/comp/insert.comp index 059549ae..437b7f32 100644 --- a/reference/opt/shaders-msl/comp/insert.comp +++ b/reference/opt/shaders-msl/comp/insert.comp @@ -10,16 +10,9 @@ struct SSBO constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); -constant float4 _53 = {}; - kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _46; - _46.x = 10.0; - _46.y = 30.0; - _46.z = 70.0; - _46.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _46; + _27.out_data[gl_GlobalInvocationID.x] = float4(10.0, 30.0, 70.0, 90.0); ((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0; } diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag index 6148b891..b3bab043 100644 --- a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag +++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag @@ -41,7 +41,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) inp.v4 = in.inp_v4; inp.v5 = in.inp_v5; inp.v6 = in.inp_v6; - out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6); + out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6); return out; } diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag index aff6e1b0..208e8806 100644 --- a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag +++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag @@ -22,7 +22,7 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, ((in.v3.w * in.v4) + in.v5) - in.v6); + out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, fma(in.v3.w, in.v4, in.v5) - in.v6); return out; } diff --git a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag index c4d6fde7..fff932c0 100644 --- a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag +++ b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag @@ -35,16 +35,16 @@ struct main0_in fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0)]], constant Push& registers [[buffer(1)]], constant UBO& ubo [[buffer(2)]]) { main0_out out = {}; - char4 _199 = as_type(20); - ssbo.i8[0] = _199.x; - ssbo.i8[1] = _199.y; - ssbo.i8[2] = _199.z; - ssbo.i8[3] = _199.w; - uchar4 _224 = as_type(20u); - ssbo.u8[0] = _224.x; - ssbo.u8[1] = _224.y; - ssbo.u8[2] = _224.z; - ssbo.u8[3] = _224.w; + char4 _204 = as_type(20); + ssbo.i8[0] = _204.x; + ssbo.i8[1] = _204.y; + ssbo.i8[2] = _204.z; + ssbo.i8[3] = _204.w; + uchar4 _229 = as_type(20u); + ssbo.u8[0] = _229.x; + ssbo.u8[1] = _229.y; + ssbo.u8[2] = _229.z; + ssbo.u8[3] = _229.w; char4 _249 = char4(in.vColor); out.FragColorInt = int4((((((_249 + char4(registers.i8)) + char4(-40)) + char4(-50)) + char4(char(10), char(20), char(30), char(40))) + char4(ssbo.i8[4])) + char4(ubo.i8)); out.FragColorUint = uint4((((((uchar4(_249) + uchar4(registers.u8)) + uchar4(216)) + uchar4(206)) + uchar4(uchar(10), uchar(20), uchar(30), uchar(40))) + uchar4(ssbo.u8[4])) + uchar4(ubo.u8)); diff --git a/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc index 99e094c9..356a963d 100644 --- a/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc +++ b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc @@ -58,20 +58,20 @@ kernel void main0(constant UBO& _41 [[buffer(0)]], uint3 gl_GlobalInvocationID [ else { patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase.xy; - float2 _681 = (gl_in[0].vPatchPosBase.xy + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _710 = (gl_in[0].vPatchPosBase.xy + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _739 = (gl_in[0].vPatchPosBase.xy + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _768 = (gl_in[0].vPatchPosBase.xy + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _797 = (gl_in[0].vPatchPosBase.xy + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _826 = (gl_in[0].vPatchPosBase.xy + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _855 = (gl_in[0].vPatchPosBase.xy + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _884 = (gl_in[0].vPatchPosBase.xy + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; + float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _913 = (gl_in[0].vPatchPosBase.xy + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy; float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25)); float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25)); float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25)); diff --git a/reference/opt/shaders-msl/tesc/water_tess.tesc b/reference/opt/shaders-msl/tesc/water_tess.tesc index 18484190..d9a6697c 100644 --- a/reference/opt/shaders-msl/tesc/water_tess.tesc +++ b/reference/opt/shaders-msl/tesc/water_tess.tesc @@ -60,20 +60,20 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin else { patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase; - float2 _681 = (gl_in[0].vPatchPosBase + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _710 = (gl_in[0].vPatchPosBase + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _739 = (gl_in[0].vPatchPosBase + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _768 = (gl_in[0].vPatchPosBase + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _797 = (gl_in[0].vPatchPosBase + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _826 = (gl_in[0].vPatchPosBase + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _855 = (gl_in[0].vPatchPosBase + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - float2 _884 = (gl_in[0].vPatchPosBase + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; + float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - float2 _913 = (gl_in[0].vPatchPosBase + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy; + float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy; float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25)); float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25)); float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25)); diff --git a/reference/opt/shaders-msl/tese/quad.domain.tese b/reference/opt/shaders-msl/tese/quad.domain.tese index dc5c7e40..6d844054 100644 --- a/reference/opt/shaders-msl/tese/quad.domain.tese +++ b/reference/opt/shaders-msl/tese/quad.domain.tese @@ -68,7 +68,7 @@ struct main0_patchIn gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter.w; float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); gl_TessCoord.y = 1.0 - gl_TessCoord.y; - out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[3], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/tese/quad.tese b/reference/opt/shaders-msl/tese/quad.tese index b6bdf415..2245cb98 100644 --- a/reference/opt/shaders-msl/tese/quad.tese +++ b/reference/opt/shaders-msl/tese/quad.tese @@ -67,7 +67,7 @@ struct main0_patchIn gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter.z; gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter.w; float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); - out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); + out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0); return out; } diff --git a/reference/opt/shaders-msl/tese/water_tess.tese b/reference/opt/shaders-msl/tese/water_tess.tese index f0d495b1..e91063f2 100644 --- a/reference/opt/shaders-msl/tese/water_tess.tese +++ b/reference/opt/shaders-msl/tese/water_tess.tese @@ -30,15 +30,14 @@ struct main0_patchIn { main0_out out = {}; float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0); - float2 _202 = patchIn.vOutPatchPosBase + (gl_TessCoord.xy * _31.uPatchSize); + float2 _202 = fma(gl_TessCoord.xy, _31.uPatchSize, patchIn.vOutPatchPosBase); float2 _216 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(gl_TessCoord.x)); float _223 = mix(_216.x, _216.y, gl_TessCoord.y); float _225 = floor(_223); - float2 _125 = _202 * _31.uInvHeightmapSize; float2 _141 = _31.uInvHeightmapSize * exp2(_225); - out.vGradNormalTex = float4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw); - float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 0.5)), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 1.0)), level(_225 + 1.0)).xyz, float3(_223 - _225)); - float2 _171 = (_202 * _31.uScale.xy) + _256.yz; + out.vGradNormalTex = float4(fma(_202, _31.uInvHeightmapSize, _31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw); + float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 0.5), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 1.0), level(_225 + 1.0)).xyz, float3(_223 - _225)); + float2 _171 = fma(_202, _31.uScale.xy, _256.yz); out.vWorld = float3(_171.x, _256.x, _171.y); out.gl_Position = _31.uMVP * float4(out.vWorld, 1.0); return out; diff --git a/reference/opt/shaders-msl/vert/invariant.msl21.vert b/reference/opt/shaders-msl/vert/invariant.msl21.vert index 73b0ec74..b7f703b2 100644 --- a/reference/opt/shaders-msl/vert/invariant.msl21.vert +++ b/reference/opt/shaders-msl/vert/invariant.msl21.vert @@ -18,8 +18,7 @@ struct main0_in vertex main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 _20 = in.vInput1 * in.vInput2; - float4 _21 = in.vInput0 + _20; + float4 _21 = fma(in.vInput1, in.vInput2, in.vInput0); out.gl_Position = _21; return out; } diff --git a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag index 5a8a350d..cdcfa822 100644 --- a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -202,7 +202,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float2 _114 = gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw; float4 _118 = SceneTexturesStruct_SceneDepthTexture.sample(SceneTexturesStruct_SceneDepthTextureSampler, _114, level(0.0)); float _119 = _118.x; - float _133 = ((_119 * View.View_InvDeviceZToWorldZTransform.x) + View.View_InvDeviceZToWorldZTransform.y) + (1.0 / ((_119 * View.View_InvDeviceZToWorldZTransform.z) - View.View_InvDeviceZToWorldZTransform.w)); + float _133 = fma(_119, View.View_InvDeviceZToWorldZTransform.x, View.View_InvDeviceZToWorldZTransform.y) + (1.0 / ((_119 * View.View_InvDeviceZToWorldZTransform.z) - View.View_InvDeviceZToWorldZTransform.w)); float4 _147 = View.View_ScreenToWorld * float4(((_114 - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_133), _133, 1.0); float3 _148 = _147.xyz; float3 _152 = _Globals.LightPositionAndInvRadius.xyz - _148; @@ -242,7 +242,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G { _207 = 1.0; } - float _213 = fast::clamp(((_207 - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float _213 = fast::clamp(fma(_207 - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction)); float4 _219; _219.z = _218; @@ -253,7 +253,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float _448; if (_248) { - float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint((select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x * 255.0) + 0.5)), 0).xy), 0); + float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint(fma(select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x, 255.0, 0.5))), 0).xy), 0); float _263 = _260.y * 0.5; float3 _266 = _148 - (_236 * float3(_263)); float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0); @@ -291,12 +291,17 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0); float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w); float _329 = (1.0 / (((_318.z / _318.w) * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w; - float _342 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, (_278 + (_286 * float3(2.5))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; - float _364 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(2.3776409626007080078125))) + (_286 * float3(0.77254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; - float _387 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(1.46946299076080322265625))) + (_286 * float3(-2.0225429534912109375))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; - float _410 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-1.46946299076080322265625))) + (_286 * float3(-2.02254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; - float _433 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-2.3776409626007080078125))) + (_286 * float3(0.772543013095855712890625))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; - _445 = (((((fast::clamp(abs((_342 > 0.0) ? (_342 + _263) : fast::max(0.0, (_342 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? (_364 + _263) : fast::max(0.0, (_364 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? (_387 + _263) : fast::max(0.0, (_387 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? (_410 + _263) : fast::max(0.0, (_410 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? (_433 + _263) : fast::max(0.0, (_433 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125; + float _341 = _329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(2.5), _278), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w); + float _342 = _341 * _323; + float _363 = _329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.77254199981689453125), fma(_285, float3(2.3776409626007080078125), _278)), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w); + float _364 = _363 * _323; + float _386 = _329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.0225429534912109375), fma(_285, float3(1.46946299076080322265625), _278)), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w); + float _387 = _386 * _323; + float _409 = _329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.02254199981689453125), fma(_285, float3(-1.46946299076080322265625), _278)), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w); + float _410 = _409 * _323; + float _432 = _329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.772543013095855712890625), fma(_285, float3(-2.3776409626007080078125), _278)), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w); + float _433 = _432 * _323; + _445 = (((((fast::clamp(abs((_342 > 0.0) ? fma(_341, _323, _263) : fast::max(0.0, fma(_342, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? fma(_363, _323, _263) : fast::max(0.0, fma(_364, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? fma(_386, _323, _263) : fast::max(0.0, fma(_387, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? fma(_409, _323, _263) : fast::max(0.0, fma(_410, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? fma(_432, _323, _263) : fast::max(0.0, fma(_433, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125; } else { diff --git a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag index 04b9018b..23f2856b 100644 --- a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -100,8 +100,6 @@ constant spvUnsafeArray _480 = spvUnsafeArray({ 0.80891323 constant spvUnsafeArray _482 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); constant spvUnsafeArray _483 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); -constant float3 _391 = {}; - struct main0_out { float4 out_var_SV_Target0 [[color(0)]]; @@ -172,26 +170,18 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } float _602 = _Globals.WhiteTemp * 1.00055634975433349609375; float _616 = (_602 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _602)) / _602)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _602)) / _602)); - float _633 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); - float _644 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _633 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0)); + float _644 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, 1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)); float _649 = ((2.0 * _633) - (8.0 * _644)) + 4.0; float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649); float2 _660 = fast::normalize(float2(_633, _644)); - float _665 = _633 + (((-_660.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); - float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _665 = fma((-_660.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _633); + float _669 = fma(_660.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _644); float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; - float2 _680 = select(float2(_616, (_616 * (((-3.0) * _616) + 2.86999988555908203125)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); + float2 _680 = select(float2(_616, (_616 * fma(-3.0, _616, 2.86999988555908203125)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10); - float3 _685; - _685.x = _680.x / _683; - _685.y = 1.0; - _685.z = ((1.0 - _680.x) - _680.y) / _683; - float3 _693; - _693.x = 0.950455963611602783203125; - _693.y = 1.0; - _693.z = 1.0890576839447021484375; - float3 _697 = _685 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); - float3 _698 = _693 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _697 = float3(_680.x / _683, 1.0, ((1.0 - _680.x) - _680.y) / _683) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _698 = float3(0.950455963611602783203125, 1.0, 1.0890576839447021484375) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_698.x / _697.x, 0.0, 0.0), float3(0.0, _698.y / _697.y, 0.0), float3(0.0, 0.0, _698.z / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547; float3 _745; if (_Globals.ColorShadow_Tint2.w != 0.0) @@ -223,7 +213,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain; float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; - float3 _905 = ((((pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))) * (_766.xyz * float3(_766.w))) + (_771.xyz + float3(_771.w))) * float3(1.0 - _804)) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))) * (_864.xyz * float3(_864.w))) + (_867.xyz + float3(_867.w))) * float3(_804 - _852))) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))) * (_817.xyz * float3(_817.w))) + (_820.xyz + float3(_820.w))) * float3(_852)); + float3 _905 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))), _817.xyz * float3(_817.w), _820.xyz + float3(_820.w)), float3(_852), fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))), _766.xyz * float3(_766.w), _771.xyz + float3(_771.w)), float3(1.0 - _804), fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))), _864.xyz * float3(_864.w), _867.xyz + float3(_867.w)) * float3(_804 - _852))); float3 _906 = _905 * _549; float3 _914 = float3(_Globals.BlueCorrection); float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551; @@ -232,11 +222,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _920 = _916.z; float _923 = fast::max(fast::max(_917, _918), _920); float _928 = (fast::max(_923, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_917, _918), _920), 1.0000000133514319600180897396058e-10)) / fast::max(_923, 0.00999999977648258209228515625); - float _941 = ((_920 + _918) + _917) + (1.75 * sqrt(((_920 * (_920 - _918)) + (_918 * (_918 - _917))) + (_917 * (_917 - _920)))); + float _941 = fma(1.75, sqrt(fma(_917, _917 - _920, fma(_920, _920 - _918, _918 * (_918 - _917)))), (_920 + _918) + _917); float _942 = _941 * 0.3333333432674407958984375; float _943 = _928 - 0.4000000059604644775390625; float _948 = fast::max(1.0 - abs(_943 * 2.5), 0.0); - float _956 = (1.0 + (float(int(sign(_943 * 5.0))) * (1.0 - (_948 * _948)))) * 0.02500000037252902984619140625; + float _956 = fma(float(int(sign(_943 * 5.0))), 1.0 - (_948 * _948), 1.0) * 0.02500000037252902984619140625; float _969; if (_942 <= 0.053333334624767303466796875) { @@ -288,7 +278,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa _1001 = _996; } float _1005 = smoothstep(0.0, 1.0, 1.0 - abs(_1001 * 0.01481481455266475677490234375)); - _972.x = _973 + ((((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973)) * 0.180000007152557373046875); + _972.x = fma(((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973), 0.180000007152557373046875, _973); float3 _1014 = fast::max(float3(0.0), _972 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); float _1023 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; float _1026 = 1.0 + _Globals.FilmWhiteClip; @@ -305,8 +295,9 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } float _1061 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1056; float _1063 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1061; - float3 _1067 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375))) * float3(0.4342944622039794921875); - float3 _1071 = float3(_Globals.FilmSlope) * (_1067 + float3(_1061)); + float3 _1064 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375))); + float3 _1067 = _1064 * float3(0.4342944622039794921875); + float3 _1071 = float3(_Globals.FilmSlope) * fma(_1064, float3(0.4342944622039794921875), float3(_1061)); float3 _1079 = float3(_1056); float3 _1080 = _1067 - _1079; float3 _1092 = float3(_1063); @@ -317,20 +308,16 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float3 _1189; if (_Globals.ColorShadow_Tint2.w == 0.0) { - float3 _1131; - _1131.x = dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz); - _1131.y = dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz); - _1131.z = dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz); - float3 _1157 = fast::max(float3(0.0), _1131 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0))))); + float3 _1157 = fast::max(float3(0.0), float3(dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz)); float3 _1162 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1157); float3 _1164 = fast::max(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); - _1189 = ((((_1164 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + _1189 = fma(fma(_1164, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); } else { _1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549); } - float3 _1218 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1189 * _1189)) + (float3(_Globals.MappingPolynomial.y) * _1189)) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float3 _1218 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1189 * _1189, float3(_Globals.MappingPolynomial.y) * _1189) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); float3 _3001; if (_Globals.OutputDevice == 0u) { @@ -391,11 +378,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2104 = _2100.z; float _2107 = fast::max(fast::max(_2101, _2102), _2104); float _2112 = (fast::max(_2107, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2101, _2102), _2104), 1.0000000133514319600180897396058e-10)) / fast::max(_2107, 0.00999999977648258209228515625); - float _2125 = ((_2104 + _2102) + _2101) + (1.75 * sqrt(((_2104 * (_2104 - _2102)) + (_2102 * (_2102 - _2101))) + (_2101 * (_2101 - _2104)))); + float _2125 = fma(1.75, sqrt(fma(_2101, _2101 - _2104, fma(_2104, _2104 - _2102, _2102 * (_2102 - _2101)))), (_2104 + _2102) + _2101); float _2126 = _2125 * 0.3333333432674407958984375; float _2127 = _2112 - 0.4000000059604644775390625; float _2132 = fast::max(1.0 - abs(_2127 * 2.5), 0.0); - float _2140 = (1.0 + (float(int(sign(_2127 * 5.0))) * (1.0 - (_2132 * _2132)))) * 0.02500000037252902984619140625; + float _2140 = fma(float(int(sign(_2127 * 5.0))), 1.0 - (_2132 * _2132), 1.0) * 0.02500000037252902984619140625; float _2153; if (_2126 <= 0.053333334624767303466796875) { @@ -457,21 +444,21 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2234; if (_2193 == 3) { - _2234 = (((_2197 * (-0.16666667163372039794921875)) + (_2196 * 0.5)) + (_2195 * (-0.5))) + 0.16666667163372039794921875; + _2234 = fma(_2195, -0.5, fma(_2197, -0.16666667163372039794921875, _2196 * 0.5)) + 0.16666667163372039794921875; } else { float _2227; if (_2193 == 2) { - _2227 = ((_2197 * 0.5) + (_2196 * (-1.0))) + 0.666666686534881591796875; + _2227 = fma(_2197, 0.5, _2196 * (-1.0)) + 0.666666686534881591796875; } else { float _2222; if (_2193 == 1) { - _2222 = (((_2197 * (-0.5)) + (_2196 * 0.5)) + (_2195 * 0.5)) + 0.16666667163372039794921875; + _2222 = fma(_2195, 0.5, fma(_2197, -0.5, _2196 * 0.5)) + 0.16666667163372039794921875; } else { @@ -496,7 +483,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa { _2235 = 0.0; } - _2156.x = _2157 + ((((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157)) * 0.180000007152557373046875); + _2156.x = fma(((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157), 0.180000007152557373046875, _2157); float3 _2245 = fast::clamp(fast::clamp(_2156, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); float3 _2248 = mix(float3(dot(_2245, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2245, float3(0.959999978542327880859375)); float _2249 = _2248.x; @@ -534,8 +521,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2327 = _2324; } - float3 _2329; - _2329.x = pow(10.0, _2327); float _2330 = _2248.y; float _2334 = log((_2330 <= 0.0) ? 6.103515625e-05 : _2330) * 0.4342944622039794921875; float _2401; @@ -571,7 +556,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2401 = _2398; } - _2329.y = pow(10.0, _2401); float _2404 = _2248.z; float _2408 = log((_2404 <= 0.0) ? 6.103515625e-05 : _2404) * 0.4342944622039794921875; float _2475; @@ -607,8 +591,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2475 = _2472; } - _2329.z = pow(10.0, _2475); - float3 _2479 = (_2329 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float3 _2479 = (float3(pow(10.0, _2327), pow(10.0, _2401), pow(10.0, _2475)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); float _2612 = pow(10.0, (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(-0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875)).z); float _2684 = pow(10.0, dot(float3(0.4444443881511688232421875, 0.66666662693023681640625, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(2.0810306072235107421875, 3.66812419891357421875, 4.0))); float _2685 = _2479.x; @@ -619,7 +602,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2768; if (_2689 <= _2691) { - _2768 = (_2688 * 1.3028833866119384765625) + ((-4.0) - (_2690 * 1.3028833866119384765625)); + _2768 = fma(_2688, 1.3028833866119384765625, (-4.0) - (_2690 * 1.3028833866119384765625)); } else { @@ -646,21 +629,19 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2739 = (_2688 * 0.026057668030261993408203125) + (3.0 - (_2705 * 0.026057668030261993408203125)); + _2739 = fma(_2688, 0.026057668030261993408203125, 3.0 - (_2705 * 0.026057668030261993408203125)); } _2760 = _2739; } _2768 = _2760; } - float3 _2770; - _2770.x = pow(10.0, _2768); float _2771 = _2479.y; float _2774 = log((_2771 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2771); float _2775 = _2774 * 0.4342944622039794921875; float _2852; if (_2775 <= _2691) { - _2852 = (_2774 * 1.3028833866119384765625) + ((-4.0) - (_2690 * 1.3028833866119384765625)); + _2852 = fma(_2774, 1.3028833866119384765625, (-4.0) - (_2690 * 1.3028833866119384765625)); } else { @@ -687,20 +668,19 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2823 = (_2774 * 0.026057668030261993408203125) + (3.0 - (_2789 * 0.026057668030261993408203125)); + _2823 = fma(_2774, 0.026057668030261993408203125, 3.0 - (_2789 * 0.026057668030261993408203125)); } _2844 = _2823; } _2852 = _2844; } - _2770.y = pow(10.0, _2852); float _2855 = _2479.z; float _2858 = log((_2855 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2855); float _2859 = _2858 * 0.4342944622039794921875; float _2936; if (_2859 <= _2691) { - _2936 = (_2858 * 1.3028833866119384765625) + ((-4.0) - (_2690 * 1.3028833866119384765625)); + _2936 = fma(_2858, 1.3028833866119384765625, (-4.0) - (_2690 * 1.3028833866119384765625)); } else { @@ -727,15 +707,14 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2907 = (_2858 * 0.026057668030261993408203125) + (3.0 - (_2873 * 0.026057668030261993408203125)); + _2907 = fma(_2858, 0.026057668030261993408203125, 3.0 - (_2873 * 0.026057668030261993408203125)); } _2928 = _2907; } _2936 = _2928; } - _2770.z = pow(10.0, _2936); - float3 _2942 = pow(((_2770 - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _2950 = pow((float3(0.8359375) + (float3(18.8515625) * _2942)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2942))), float3(78.84375)); + float3 _2942 = pow(((float3(pow(10.0, _2768), pow(10.0, _2852), pow(10.0, _2936)) - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2950 = pow(fma(float3(18.8515625), _2942, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2942, float3(1.0))), float3(78.84375)); } else { @@ -748,11 +727,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _1267 = _1263.z; float _1270 = fast::max(fast::max(_1264, _1265), _1267); float _1275 = (fast::max(_1270, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1264, _1265), _1267), 1.0000000133514319600180897396058e-10)) / fast::max(_1270, 0.00999999977648258209228515625); - float _1288 = ((_1267 + _1265) + _1264) + (1.75 * sqrt(((_1267 * (_1267 - _1265)) + (_1265 * (_1265 - _1264))) + (_1264 * (_1264 - _1267)))); + float _1288 = fma(1.75, sqrt(fma(_1264, _1264 - _1267, fma(_1267, _1267 - _1265, _1265 * (_1265 - _1264)))), (_1267 + _1265) + _1264); float _1289 = _1288 * 0.3333333432674407958984375; float _1290 = _1275 - 0.4000000059604644775390625; float _1295 = fast::max(1.0 - abs(_1290 * 2.5), 0.0); - float _1303 = (1.0 + (float(int(sign(_1290 * 5.0))) * (1.0 - (_1295 * _1295)))) * 0.02500000037252902984619140625; + float _1303 = fma(float(int(sign(_1290 * 5.0))), 1.0 - (_1295 * _1295), 1.0) * 0.02500000037252902984619140625; float _1316; if (_1289 <= 0.053333334624767303466796875) { @@ -814,21 +793,21 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _1397; if (_1356 == 3) { - _1397 = (((_1360 * (-0.16666667163372039794921875)) + (_1359 * 0.5)) + (_1358 * (-0.5))) + 0.16666667163372039794921875; + _1397 = fma(_1358, -0.5, fma(_1360, -0.16666667163372039794921875, _1359 * 0.5)) + 0.16666667163372039794921875; } else { float _1390; if (_1356 == 2) { - _1390 = ((_1360 * 0.5) + (_1359 * (-1.0))) + 0.666666686534881591796875; + _1390 = fma(_1360, 0.5, _1359 * (-1.0)) + 0.666666686534881591796875; } else { float _1385; if (_1356 == 1) { - _1385 = (((_1360 * (-0.5)) + (_1359 * 0.5)) + (_1358 * 0.5)) + 0.16666667163372039794921875; + _1385 = fma(_1358, 0.5, fma(_1360, -0.5, _1359 * 0.5)) + 0.16666667163372039794921875; } else { @@ -853,7 +832,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa { _1398 = 0.0; } - _1319.x = _1320 + ((((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320)) * 0.180000007152557373046875); + _1319.x = fma(((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320), 0.180000007152557373046875, _1320); float3 _1408 = fast::clamp(fast::clamp(_1319, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); float3 _1411 = mix(float3(dot(_1408, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1408, float3(0.959999978542327880859375)); float _1412 = _1411.x; @@ -891,8 +870,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1490 = _1487; } - float3 _1492; - _1492.x = pow(10.0, _1490); float _1493 = _1411.y; float _1497 = log((_1493 <= 0.0) ? 6.103515625e-05 : _1493) * 0.4342944622039794921875; float _1564; @@ -928,7 +905,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1564 = _1561; } - _1492.y = pow(10.0, _1564); float _1567 = _1411.z; float _1571 = log((_1567 <= 0.0) ? 6.103515625e-05 : _1567) * 0.4342944622039794921875; float _1638; @@ -964,8 +940,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1638 = _1635; } - _1492.z = pow(10.0, _1638); - float3 _1642 = (_1492 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float3 _1642 = (float3(pow(10.0, _1490), pow(10.0, _1564), pow(10.0, _1638)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); float _1775 = pow(10.0, (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(-0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875)).z); float _1847 = pow(10.0, dot(float3(0.69444429874420166015625, 0.8333332538604736328125, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(2.0810306072235107421875, 3.66812419891357421875, 4.0))); float _1848 = _1642.x; @@ -1002,14 +977,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1902 = (_1851 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_1868 * 0.05211533606052398681640625)); + _1902 = fma(_1851, 0.05211533606052398681640625, 3.3010299205780029296875 - (_1868 * 0.05211533606052398681640625)); } _1923 = _1902; } _1926 = _1923; } - float3 _1928; - _1928.x = pow(10.0, _1926); float _1929 = _1642.y; float _1932 = log((_1929 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1929); float _1933 = _1932 * 0.4342944622039794921875; @@ -1043,13 +1016,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1981 = (_1932 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_1947 * 0.05211533606052398681640625)); + _1981 = fma(_1932, 0.05211533606052398681640625, 3.3010299205780029296875 - (_1947 * 0.05211533606052398681640625)); } _2002 = _1981; } _2005 = _2002; } - _1928.y = pow(10.0, _2005); float _2008 = _1642.z; float _2011 = log((_2008 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2008); float _2012 = _2011 * 0.4342944622039794921875; @@ -1083,15 +1055,14 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2060 = (_2011 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_2026 * 0.05211533606052398681640625)); + _2060 = fma(_2011, 0.05211533606052398681640625, 3.3010299205780029296875 - (_2026 * 0.05211533606052398681640625)); } _2081 = _2060; } _2084 = _2081; } - _1928.z = pow(10.0, _2084); - float3 _2089 = pow((_1928 * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _2097 = pow((float3(0.8359375) + (float3(18.8515625) * _2089)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2089))), float3(78.84375)); + float3 _2089 = pow((float3(pow(10.0, _1926), pow(10.0, _2005), pow(10.0, _2084)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2097 = pow(fma(float3(18.8515625), _2089, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2089, float3(1.0))), float3(78.84375)); } else { @@ -1099,7 +1070,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa if (_Globals.OutputDevice == 7u) { float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _1260 = pow((float3(0.8359375) + (float3(18.8515625) * _1252)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1252))), float3(78.84375)); + _1260 = pow(fma(float3(18.8515625), _1252, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1252, float3(1.0))), float3(78.84375)); } else { diff --git a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag index 4f7d1023..d74ada05 100644 --- a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -101,9 +101,6 @@ constant spvUnsafeArray _504 = spvUnsafeArray({ 0.80891323 constant spvUnsafeArray _506 = spvUnsafeArray({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 }); constant spvUnsafeArray _507 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); -constant float3 _523 = {}; -constant float3 _3265 = {}; - struct main0_out { float4 out_var_SV_Target0 [[color(0)]]; @@ -174,26 +171,18 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } float _628 = _Globals.WhiteTemp * 1.00055634975433349609375; float _642 = (_628 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _628)) / _628)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _628)) / _628)); - float _659 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); - float _670 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); + float _659 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0)); + float _670 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, 1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)); float _675 = ((2.0 * _659) - (8.0 * _670)) + 4.0; float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675); float2 _686 = fast::normalize(float2(_659, _670)); - float _691 = _659 + (((-_686.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); - float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); + float _691 = fma((-_686.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _659); + float _695 = fma(_686.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _670); float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; - float2 _706 = select(float2(_642, (_642 * (((-3.0) * _642) + 2.86999988555908203125)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); + float2 _706 = select(float2(_642, (_642 * fma(-3.0, _642, 2.86999988555908203125)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10); - float3 _711; - _711.x = _706.x / _709; - _711.y = 1.0; - _711.z = ((1.0 - _706.x) - _706.y) / _709; - float3 _719; - _719.x = 0.950455963611602783203125; - _719.y = 1.0; - _719.z = 1.0890576839447021484375; - float3 _723 = _711 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); - float3 _724 = _719 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _723 = float3(_706.x / _709, 1.0, ((1.0 - _706.x) - _706.y) / _709) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); + float3 _724 = float3(0.950455963611602783203125, 1.0, 1.0890576839447021484375) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_724.x / _723.x, 0.0, 0.0), float3(0.0, _724.y / _723.y, 0.0), float3(0.0, 0.0, _724.z / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573; float3 _771; if (_Globals.ColorShadow_Tint2.w != 0.0) @@ -225,7 +214,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float4 _887 = _Globals.ColorGammaMidtones * _Globals.ColorGamma; float4 _890 = _Globals.ColorGainMidtones * _Globals.ColorGain; float4 _893 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset; - float3 _931 = ((((pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))) * (_792.xyz * float3(_792.w))) + (_797.xyz + float3(_797.w))) * float3(1.0 - _830)) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))) * (_890.xyz * float3(_890.w))) + (_893.xyz + float3(_893.w))) * float3(_830 - _878))) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))) * (_843.xyz * float3(_843.w))) + (_846.xyz + float3(_846.w))) * float3(_878)); + float3 _931 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))), _843.xyz * float3(_843.w), _846.xyz + float3(_846.w)), float3(_878), fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))), _792.xyz * float3(_792.w), _797.xyz + float3(_797.w)), float3(1.0 - _830), fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))), _890.xyz * float3(_890.w), _893.xyz + float3(_893.w)) * float3(_830 - _878))); float3 _932 = _931 * _575; float3 _940 = float3(_Globals.BlueCorrection); float3 _942 = mix(_931, _931 * ((_577 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _576), _940) * _577; @@ -234,11 +223,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _946 = _942.z; float _949 = fast::max(fast::max(_943, _944), _946); float _954 = (fast::max(_949, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_943, _944), _946), 1.0000000133514319600180897396058e-10)) / fast::max(_949, 0.00999999977648258209228515625); - float _967 = ((_946 + _944) + _943) + (1.75 * sqrt(((_946 * (_946 - _944)) + (_944 * (_944 - _943))) + (_943 * (_943 - _946)))); + float _967 = fma(1.75, sqrt(fma(_943, _943 - _946, fma(_946, _946 - _944, _944 * (_944 - _943)))), (_946 + _944) + _943); float _968 = _967 * 0.3333333432674407958984375; float _969 = _954 - 0.4000000059604644775390625; float _974 = fast::max(1.0 - abs(_969 * 2.5), 0.0); - float _982 = (1.0 + (float(int(sign(_969 * 5.0))) * (1.0 - (_974 * _974)))) * 0.02500000037252902984619140625; + float _982 = fma(float(int(sign(_969 * 5.0))), 1.0 - (_974 * _974), 1.0) * 0.02500000037252902984619140625; float _995; if (_968 <= 0.053333334624767303466796875) { @@ -290,7 +279,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa _1027 = _1022; } float _1031 = smoothstep(0.0, 1.0, 1.0 - abs(_1027 * 0.01481481455266475677490234375)); - _998.x = _999 + ((((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999)) * 0.180000007152557373046875); + _998.x = fma(((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999), 0.180000007152557373046875, _999); float3 _1040 = fast::max(float3(0.0), _998 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375))); float _1049 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe; float _1052 = 1.0 + _Globals.FilmWhiteClip; @@ -307,8 +296,9 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } float _1087 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1082; float _1089 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1087; - float3 _1093 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375))) * float3(0.4342944622039794921875); - float3 _1097 = float3(_Globals.FilmSlope) * (_1093 + float3(_1087)); + float3 _1090 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375))); + float3 _1093 = _1090 * float3(0.4342944622039794921875); + float3 _1097 = float3(_Globals.FilmSlope) * fma(_1090, float3(0.4342944622039794921875), float3(_1087)); float3 _1105 = float3(_1082); float3 _1106 = _1093 - _1105; float3 _1118 = float3(_1089); @@ -319,14 +309,10 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float3 _1215; if (_Globals.ColorShadow_Tint2.w == 0.0) { - float3 _1157; - _1157.x = dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz); - _1157.y = dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz); - _1157.z = dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz); - float3 _1183 = fast::max(float3(0.0), _1157 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0))))); + float3 _1183 = fast::max(float3(0.0), float3(dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz)); float3 _1188 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1183); float3 _1190 = fast::max(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz); - _1215 = ((((_1190 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); + _1215 = fma(fma(_1190, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375); } else { @@ -370,50 +356,53 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa break; } float3 _1256 = float3(_1229, _1242, _1255); - float3 _1258 = (_1256 * float3(0.9375)) + float3(0.03125); + float3 _1258 = fma(_1256, float3(0.9375), float3(0.03125)); float _1270 = (_1258.z * 16.0) - 0.5; float _1271 = floor(_1270); - float _1275 = (_1258.x + _1271) * 0.0625; + float _1274 = _1258.x + _1271; float _1276 = _1258.y; - float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276)); - float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276)); - float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz)); - float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375)); - float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); + float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1274 * 0.0625, _1276)); + float4 _1283 = Texture1.sample(Texture1Sampler, float2(fma(_1274, 0.0625, 0.0625), _1276)); + float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), fma(float3(_Globals.LUTWeights[0].x), _1256, float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz)); + float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow(fma(_1289, float3(0.94786727428436279296875), float3(0.0521326996386051177978515625)), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375)); + float3 _1324 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1295 * _1295, float3(_Globals.MappingPolynomial.y) * _1295) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y)); float3 _3103; if (_Globals.OutputDevice == 0u) { + float _3063 = _1324.x; float _3075; for (;;) { - if (_1324.x < 0.00313066993840038776397705078125) + if (_3063 < 0.00313066993840038776397705078125) { - _3075 = _1324.x * 12.9200000762939453125; + _3075 = _3063 * 12.9200000762939453125; break; } - _3075 = (pow(_1324.x, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3075 = (pow(_3063, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } + float _3076 = _1324.y; float _3088; for (;;) { - if (_1324.y < 0.00313066993840038776397705078125) + if (_3076 < 0.00313066993840038776397705078125) { - _3088 = _1324.y * 12.9200000762939453125; + _3088 = _3076 * 12.9200000762939453125; break; } - _3088 = (pow(_1324.y, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3088 = (pow(_3076, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } + float _3089 = _1324.z; float _3101; for (;;) { - if (_1324.z < 0.00313066993840038776397705078125) + if (_3089 < 0.00313066993840038776397705078125) { - _3101 = _1324.z * 12.9200000762939453125; + _3101 = _3089 * 12.9200000762939453125; break; } - _3101 = (pow(_1324.z, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3101 = (pow(_3089, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } _3103 = float3(_3075, _3088, _3101); @@ -437,11 +426,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2208 = _2204.z; float _2211 = fast::max(fast::max(_2205, _2206), _2208); float _2216 = (fast::max(_2211, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2205, _2206), _2208), 1.0000000133514319600180897396058e-10)) / fast::max(_2211, 0.00999999977648258209228515625); - float _2229 = ((_2208 + _2206) + _2205) + (1.75 * sqrt(((_2208 * (_2208 - _2206)) + (_2206 * (_2206 - _2205))) + (_2205 * (_2205 - _2208)))); + float _2229 = fma(1.75, sqrt(fma(_2205, _2205 - _2208, fma(_2208, _2208 - _2206, _2206 * (_2206 - _2205)))), (_2208 + _2206) + _2205); float _2230 = _2229 * 0.3333333432674407958984375; float _2231 = _2216 - 0.4000000059604644775390625; float _2236 = fast::max(1.0 - abs(_2231 * 2.5), 0.0); - float _2244 = (1.0 + (float(int(sign(_2231 * 5.0))) * (1.0 - (_2236 * _2236)))) * 0.02500000037252902984619140625; + float _2244 = fma(float(int(sign(_2231 * 5.0))), 1.0 - (_2236 * _2236), 1.0) * 0.02500000037252902984619140625; float _2257; if (_2230 <= 0.053333334624767303466796875) { @@ -503,21 +492,21 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2338; if (_2297 == 3) { - _2338 = (((_2301 * (-0.16666667163372039794921875)) + (_2300 * 0.5)) + (_2299 * (-0.5))) + 0.16666667163372039794921875; + _2338 = fma(_2299, -0.5, fma(_2301, -0.16666667163372039794921875, _2300 * 0.5)) + 0.16666667163372039794921875; } else { float _2331; if (_2297 == 2) { - _2331 = ((_2301 * 0.5) + (_2300 * (-1.0))) + 0.666666686534881591796875; + _2331 = fma(_2301, 0.5, _2300 * (-1.0)) + 0.666666686534881591796875; } else { float _2326; if (_2297 == 1) { - _2326 = (((_2301 * (-0.5)) + (_2300 * 0.5)) + (_2299 * 0.5)) + 0.16666667163372039794921875; + _2326 = fma(_2299, 0.5, fma(_2301, -0.5, _2300 * 0.5)) + 0.16666667163372039794921875; } else { @@ -542,7 +531,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa { _2339 = 0.0; } - _2260.x = _2261 + ((((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261)) * 0.180000007152557373046875); + _2260.x = fma(((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261), 0.180000007152557373046875, _2261); float3 _2349 = fast::clamp(fast::clamp(_2260, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); float3 _2352 = mix(float3(dot(_2349, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2349, float3(0.959999978542327880859375)); float _2353 = _2352.x; @@ -580,8 +569,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2431 = _2428; } - float3 _2433; - _2433.x = pow(10.0, _2431); float _2434 = _2352.y; float _2438 = log((_2434 <= 0.0) ? 6.103515625e-05 : _2434) * 0.4342944622039794921875; float _2505; @@ -617,7 +604,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2505 = _2502; } - _2433.y = pow(10.0, _2505); float _2508 = _2352.z; float _2512 = log((_2508 <= 0.0) ? 6.103515625e-05 : _2508) * 0.4342944622039794921875; float _2579; @@ -653,8 +639,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _2579 = _2576; } - _2433.z = pow(10.0, _2579); - float3 _2583 = (_2433 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float3 _2583 = (float3(pow(10.0, _2431), pow(10.0, _2505), pow(10.0, _2579)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); float _2714 = pow(10.0, (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(-0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875)).z); float _2786 = pow(10.0, dot(float3(0.4444443881511688232421875, 0.66666662693023681640625, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(2.0810306072235107421875, 3.66812419891357421875, 4.0))); float _2787 = _2583.x; @@ -665,7 +650,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _2870; if (_2791 <= _2793) { - _2870 = (_2790 * 1.3028833866119384765625) + ((-4.0) - (_2792 * 1.3028833866119384765625)); + _2870 = fma(_2790, 1.3028833866119384765625, (-4.0) - (_2792 * 1.3028833866119384765625)); } else { @@ -692,21 +677,19 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2841 = (_2790 * 0.026057668030261993408203125) + (3.0 - (_2807 * 0.026057668030261993408203125)); + _2841 = fma(_2790, 0.026057668030261993408203125, 3.0 - (_2807 * 0.026057668030261993408203125)); } _2862 = _2841; } _2870 = _2862; } - float3 _2872; - _2872.x = pow(10.0, _2870); float _2873 = _2583.y; float _2876 = log((_2873 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2873); float _2877 = _2876 * 0.4342944622039794921875; float _2954; if (_2877 <= _2793) { - _2954 = (_2876 * 1.3028833866119384765625) + ((-4.0) - (_2792 * 1.3028833866119384765625)); + _2954 = fma(_2876, 1.3028833866119384765625, (-4.0) - (_2792 * 1.3028833866119384765625)); } else { @@ -733,20 +716,19 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2925 = (_2876 * 0.026057668030261993408203125) + (3.0 - (_2891 * 0.026057668030261993408203125)); + _2925 = fma(_2876, 0.026057668030261993408203125, 3.0 - (_2891 * 0.026057668030261993408203125)); } _2946 = _2925; } _2954 = _2946; } - _2872.y = pow(10.0, _2954); float _2957 = _2583.z; float _2960 = log((_2957 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2957); float _2961 = _2960 * 0.4342944622039794921875; float _3038; if (_2961 <= _2793) { - _3038 = (_2960 * 1.3028833866119384765625) + ((-4.0) - (_2792 * 1.3028833866119384765625)); + _3038 = fma(_2960, 1.3028833866119384765625, (-4.0) - (_2792 * 1.3028833866119384765625)); } else { @@ -773,15 +755,14 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _3009 = (_2960 * 0.026057668030261993408203125) + (3.0 - (_2975 * 0.026057668030261993408203125)); + _3009 = fma(_2960, 0.026057668030261993408203125, 3.0 - (_2975 * 0.026057668030261993408203125)); } _3030 = _3009; } _3038 = _3030; } - _2872.z = pow(10.0, _3038); - float3 _3044 = pow(((_2872 - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _3052 = pow((float3(0.8359375) + (float3(18.8515625) * _3044)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _3044))), float3(78.84375)); + float3 _3044 = pow(((float3(pow(10.0, _2870), pow(10.0, _2954), pow(10.0, _3038)) - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _3052 = pow(fma(float3(18.8515625), _3044, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _3044, float3(1.0))), float3(78.84375)); } else { @@ -794,11 +775,11 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _1373 = _1369.z; float _1376 = fast::max(fast::max(_1370, _1371), _1373); float _1381 = (fast::max(_1376, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1370, _1371), _1373), 1.0000000133514319600180897396058e-10)) / fast::max(_1376, 0.00999999977648258209228515625); - float _1394 = ((_1373 + _1371) + _1370) + (1.75 * sqrt(((_1373 * (_1373 - _1371)) + (_1371 * (_1371 - _1370))) + (_1370 * (_1370 - _1373)))); + float _1394 = fma(1.75, sqrt(fma(_1370, _1370 - _1373, fma(_1373, _1373 - _1371, _1371 * (_1371 - _1370)))), (_1373 + _1371) + _1370); float _1395 = _1394 * 0.3333333432674407958984375; float _1396 = _1381 - 0.4000000059604644775390625; float _1401 = fast::max(1.0 - abs(_1396 * 2.5), 0.0); - float _1409 = (1.0 + (float(int(sign(_1396 * 5.0))) * (1.0 - (_1401 * _1401)))) * 0.02500000037252902984619140625; + float _1409 = fma(float(int(sign(_1396 * 5.0))), 1.0 - (_1401 * _1401), 1.0) * 0.02500000037252902984619140625; float _1422; if (_1395 <= 0.053333334624767303466796875) { @@ -860,21 +841,21 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _1503; if (_1462 == 3) { - _1503 = (((_1466 * (-0.16666667163372039794921875)) + (_1465 * 0.5)) + (_1464 * (-0.5))) + 0.16666667163372039794921875; + _1503 = fma(_1464, -0.5, fma(_1466, -0.16666667163372039794921875, _1465 * 0.5)) + 0.16666667163372039794921875; } else { float _1496; if (_1462 == 2) { - _1496 = ((_1466 * 0.5) + (_1465 * (-1.0))) + 0.666666686534881591796875; + _1496 = fma(_1466, 0.5, _1465 * (-1.0)) + 0.666666686534881591796875; } else { float _1491; if (_1462 == 1) { - _1491 = (((_1466 * (-0.5)) + (_1465 * 0.5)) + (_1464 * 0.5)) + 0.16666667163372039794921875; + _1491 = fma(_1464, 0.5, fma(_1466, -0.5, _1465 * 0.5)) + 0.16666667163372039794921875; } else { @@ -899,7 +880,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa { _1504 = 0.0; } - _1425.x = _1426 + ((((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426)) * 0.180000007152557373046875); + _1425.x = fma(((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426), 0.180000007152557373046875, _1426); float3 _1514 = fast::clamp(fast::clamp(_1425, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0)); float3 _1517 = mix(float3(dot(_1514, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1514, float3(0.959999978542327880859375)); float _1518 = _1517.x; @@ -937,8 +918,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1596 = _1593; } - float3 _1598; - _1598.x = pow(10.0, _1596); float _1599 = _1517.y; float _1603 = log((_1599 <= 0.0) ? 6.103515625e-05 : _1599) * 0.4342944622039794921875; float _1670; @@ -974,7 +953,6 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1670 = _1667; } - _1598.y = pow(10.0, _1670); float _1673 = _1517.z; float _1677 = log((_1673 <= 0.0) ? 6.103515625e-05 : _1673) * 0.4342944622039794921875; float _1744; @@ -1010,8 +988,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } _1744 = _1741; } - _1598.z = pow(10.0, _1744); - float3 _1748 = (_1598 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); + float3 _1748 = (float3(pow(10.0, _1596), pow(10.0, _1670), pow(10.0, _1744)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)); float _1879 = pow(10.0, (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(-0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875)).z); float _1951 = pow(10.0, dot(float3(0.69444429874420166015625, 0.8333332538604736328125, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(2.0810306072235107421875, 3.66812419891357421875, 4.0))); float _1952 = _1748.x; @@ -1048,14 +1025,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2006 = (_1955 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_1972 * 0.05211533606052398681640625)); + _2006 = fma(_1955, 0.05211533606052398681640625, 3.3010299205780029296875 - (_1972 * 0.05211533606052398681640625)); } _2027 = _2006; } _2030 = _2027; } - float3 _2032; - _2032.x = pow(10.0, _2030); float _2033 = _1748.y; float _2036 = log((_2033 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2033); float _2037 = _2036 * 0.4342944622039794921875; @@ -1089,13 +1064,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2085 = (_2036 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_2051 * 0.05211533606052398681640625)); + _2085 = fma(_2036, 0.05211533606052398681640625, 3.3010299205780029296875 - (_2051 * 0.05211533606052398681640625)); } _2106 = _2085; } _2109 = _2106; } - _2032.y = pow(10.0, _2109); float _2112 = _1748.z; float _2115 = log((_2112 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2112); float _2116 = _2115 * 0.4342944622039794921875; @@ -1129,15 +1103,14 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2164 = (_2115 * 0.05211533606052398681640625) + (3.3010299205780029296875 - (_2130 * 0.05211533606052398681640625)); + _2164 = fma(_2115, 0.05211533606052398681640625, 3.3010299205780029296875 - (_2130 * 0.05211533606052398681640625)); } _2185 = _2164; } _2188 = _2185; } - _2032.z = pow(10.0, _2188); - float3 _2193 = pow((_2032 * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _2201 = pow((float3(0.8359375) + (float3(18.8515625) * _2193)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2193))), float3(78.84375)); + float3 _2193 = pow((float3(pow(10.0, _2030), pow(10.0, _2109), pow(10.0, _2188)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); + _2201 = pow(fma(float3(18.8515625), _2193, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2193, float3(1.0))), float3(78.84375)); } else { @@ -1145,7 +1118,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa if (_Globals.OutputDevice == 7u) { float3 _1358 = pow(((_932 * _573) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125)); - _1366 = pow((float3(0.8359375) + (float3(18.8515625) * _1358)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1358))), float3(78.84375)); + _1366 = pow(fma(float3(18.8515625), _1358, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1358, float3(1.0))), float3(78.84375)); } else { diff --git a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag index 210aee9b..790ad27a 100644 --- a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -450,19 +450,19 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float4 _202; if (_165) { - _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(((_172.xy / float2(_173)).xy * float2(0.5, -0.5)) + float2(0.5), (log2((_173 * View.View_VolumetricFogGridZParams[0]) + View.View_VolumetricFogGridZParams[1]) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0)); + _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(fma((_172.xy / float2(_173)).xy, float2(0.5, -0.5), float2(0.5)), (log2(fma(_173, View.View_VolumetricFogGridZParams[0], View.View_VolumetricFogGridZParams[1])) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0)); } else { _202 = float4(0.0, 0.0, 0.0, 1.0); } - _215 = float4(_202.xyz + (in.in_var_TEXCOORD7.xyz * float3(_202.w)), _202.w * in.in_var_TEXCOORD7.w); + _215 = float4(fma(in.in_var_TEXCOORD7.xyz, float3(_202.w), _202.xyz), _202.w * in.in_var_TEXCOORD7.w); } else { _215 = in.in_var_TEXCOORD7; } - float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); + float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(fma(1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize(fma(float3(0.0, 0.0, 1.0), float3(View.View_NormalOverrideParameter.w), View.View_NormalOverrideParameter.xyz)))), 0.5, 0.20000000298023223876953125)), float3(0.0)); float3 _246; if (View.View_OutOfBoundsMask > 0.0) { @@ -482,7 +482,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { _246 = _216; } - float4 _255 = float4((_246 * float3(_215.w)) + _215.xyz, _108); + float4 _255 = float4(fma(_246, float3(_215.w), _215.xyz), _108); _255.w = 1.0; float4 _268; uint _269; diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag index 5017be6c..5e60f06d 100644 --- a/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag @@ -204,7 +204,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; float _68 = _67.w; float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); - float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float _118 = fast::clamp(fma(fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); _128.w = 0.0; diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag index 5017be6c..5e60f06d 100644 --- a/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag @@ -204,7 +204,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData; float _68 = _67.w; float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0); - float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0); + float _118 = fast::clamp(fma(fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0); float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction))); float4 _128 = float4(_127.x, _127.y, _127.z, _58.w); _128.w = 0.0; diff --git a/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese index 7d4ec2c5..6201fefa 100644 --- a/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese +++ b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese @@ -381,14 +381,14 @@ struct main0_patchIn float4 _177 = float4(_157 * 3.0); float4 _181 = float4(_158 * 3.0); float4 _188 = float4(_159 * 3.0); - float4 _202 = ((((((((((_135[0][0] * float4(_157)) * _165) + ((_135[1][0] * float4(_158)) * _169)) + ((_135[2][0] * float4(_159)) * _174)) + ((_135[0][1] * _177) * _169)) + ((_135[0][2] * _181) * _165)) + ((_135[1][1] * _181) * _174)) + ((_135[1][2] * _188) * _169)) + ((_135[2][1] * _188) * _165)) + ((_135[2][2] * _177) * _174)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165) * _169); - float3 _226 = ((_117[0].xyz * float3(gl_TessCoord.x)) + (_117[1].xyz * float3(gl_TessCoord.y))).xyz + (_117[2].xyz * float3(gl_TessCoord.z)); - float4 _229 = ((_118[0] * _165) + (_118[1] * _169)) + (_118[2] * _174); - float4 _231 = ((_119[0] * _165) + (_119[1] * _169)) + (_119[2] * _174); - float4 _233 = ((_120[0][0] * _165) + (_120[1][0] * _169)) + (_120[2][0] * _174); + float4 _202 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165, _169, fma(_135[2][2] * _177, _174, fma(_135[2][1] * _188, _165, fma(_135[1][2] * _188, _169, fma(_135[1][1] * _181, _174, fma(_135[0][2] * _181, _165, fma(_135[0][1] * _177, _169, fma(_135[2][0] * float4(_159), _174, fma(_135[0][0] * float4(_157), _165, (_135[1][0] * float4(_158)) * _169))))))))); + float3 _226 = fma(_117[2].xyz, float3(gl_TessCoord.z), fma(_117[0].xyz, float3(gl_TessCoord.x), _117[1].xyz * float3(gl_TessCoord.y)).xyz); + float4 _229 = fma(_118[2], _174, fma(_118[0], _165, _118[1] * _169)); + float4 _231 = fma(_119[2], _174, fma(_119[0], _165, _119[1] * _169)); + float4 _233 = fma(_120[2][0], _174, fma(_120[0][0], _165, _120[1][0] * _169)); spvUnsafeArray _234 = spvUnsafeArray({ _233 }); float3 _236 = _229.xyz; - float3 _264 = _202.xyz + (((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, (float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875)) + (_233.zw * float2(1.0, 2.0))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5)) * float3(((_136[0] * gl_TessCoord.x) + (_136[1] * gl_TessCoord.y)) + (_136[2] * gl_TessCoord.z))); + float3 _264 = fma((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, fma(_233.zw, float2(1.0, 2.0), float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5), float3(fma(_136[2], gl_TessCoord.z, fma(_136[0], gl_TessCoord.x, _136[1] * gl_TessCoord.y))), _202.xyz); float4 _270 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_264.x, _264.y, _264.z, _202.w); float4 _281; if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_270.z < 0.0)) @@ -409,7 +409,7 @@ struct main0_patchIn out_var_TEXCOORD0 = _234; out.out_var_PRIMITIVE_ID = patchIn.gl_in[0u].in_var_PRIMITIVE_ID; out.out_var_TEXCOORD6 = _281.z; - out.out_var_TEXCOORD8 = (ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(1.0 - (_290 * _290), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x; + out.out_var_TEXCOORD8 = fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(1.0 - (_290 * _290), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x); out.out_var_TEXCOORD7 = _264.xyz; out.gl_Position = _281; out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese index f1b74aac..987ba54e 100644 --- a/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese +++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese @@ -299,12 +299,12 @@ struct main0_patchIn float4 _210 = float4(_190 * 3.0); float4 _214 = float4(_191 * 3.0); float4 _221 = float4(_192 * 3.0); - float4 _235 = ((((((((((_136[0][0] * float4(_190)) * _198) + ((_136[1][0] * float4(_191)) * _202)) + ((_136[2][0] * float4(_192)) * _207)) + ((_136[0][1] * _210) * _202)) + ((_136[0][2] * _214) * _198)) + ((_136[1][1] * _214) * _207)) + ((_136[1][2] * _221) * _202)) + ((_136[2][1] * _221) * _198)) + ((_136[2][2] * _210) * _207)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198) * _202); + float4 _235 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198, _202, fma(_136[2][2] * _210, _207, fma(_136[2][1] * _221, _198, fma(_136[1][2] * _221, _202, fma(_136[1][1] * _214, _207, fma(_136[0][2] * _214, _198, fma(_136[0][1] * _210, _202, fma(_136[2][0] * float4(_192), _207, fma(_136[0][0] * float4(_190), _198, (_136[1][0] * float4(_191)) * _202))))))))); float3 _237 = float3(gl_TessCoord.x); float3 _240 = float3(gl_TessCoord.y); float3 _254 = float3(gl_TessCoord.z); - float3 _256 = ((_128[0].xyz * _237) + (_128[1].xyz * _240)).xyz + (_128[2].xyz * _254); - float4 _259 = ((_129[0] * _198) + (_129[1] * _202)) + (_129[2] * _207); + float3 _256 = fma(_128[2].xyz, _254, fma(_128[0].xyz, _237, _128[1].xyz * _240).xyz); + float4 _259 = fma(_129[2], _207, fma(_129[0], _198, _129[1] * _202)); float3 _264 = _235.xyz; float3 _265 = _256.xyz; float3 _266 = _259.xyz; @@ -321,8 +321,8 @@ struct main0_patchIn float _363 = float(int((_282 + _285) == 2.0)); float _367 = float(int((_285 + _279) == 2.0)); float _370 = float(int(_286 == 2.0)); - _387 = ((float4(_363) * _138[0]) + (float4(_367) * _138[1])) + (float4(_370) * _138[2]); - _388 = ((float3(_363) * _139[0]) + (float3(_367) * _139[1])) + (float3(_370) * _139[2]); + _387 = fma(float4(_370), _138[2], fma(float4(_363), _138[0], float4(_367) * _138[1])); + _388 = fma(float3(_370), _139[2], fma(float3(_363), _139[0], float3(_367) * _139[1])); } else { @@ -333,15 +333,15 @@ struct main0_patchIn float4 _304 = float4(_279); float4 _306 = float4(_282); float4 _309 = float4(_285); - float4 _311 = ((_304 * _146[0]) + (_306 * _146[1])) + (_309 * _146[2]); - float4 _316 = ((_304 * _147[0]) + (_306 * _147[1])) + (_309 * _147[2]); + float4 _311 = fma(_309, _146[2], fma(_304, _146[0], _306 * _146[1])); + float4 _316 = fma(_309, _147[2], fma(_304, _147[0], _306 * _147[1])); float3 _331 = float3(_279); float3 _333 = float3(_282); float3 _336 = float3(_285); - float3 _338 = ((_331 * _148[0]) + (_333 * _148[1])) + (_336 * _148[2]); - float3 _343 = ((_331 * _149[0]) + (_333 * _149[1])) + (_336 * _149[2]); - _358 = ((_304 * ((_202 * _311) + (_207 * _316))) + (_306 * ((_207 * _311) + (_198 * _316)))) + (_309 * ((_198 * _311) + (_202 * _316))); - _359 = ((_331 * ((_240 * _338) + (_254 * _343))) + (_333 * ((_254 * _338) + (_237 * _343)))) + (_336 * ((_237 * _338) + (_240 * _343))); + float3 _338 = fma(_336, _148[2], fma(_331, _148[0], _333 * _148[1])); + float3 _343 = fma(_336, _149[2], fma(_331, _149[0], _333 * _149[1])); + _358 = fma(_309, fma(_198, _311, _202 * _316), fma(_304, fma(_202, _311, _207 * _316), _306 * fma(_207, _311, _198 * _316))); + _359 = fma(_336, fma(_237, _338, _240 * _343), fma(_331, fma(_240, _338, _254 * _343), _333 * fma(_254, _338, _237 * _343))); } else { @@ -364,7 +364,7 @@ struct main0_patchIn float _547; if (fast::min(_411.x, fast::min(_411.y, _411.z)) > (View.View_GlobalVolumeCenterAndExtent[0].w * View.View_GlobalVolumeTexelSize)) { - _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[0u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x; + _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[0u].www, View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x; } else { @@ -372,7 +372,7 @@ struct main0_patchIn float _535; if (fast::min(_436.x, fast::min(_436.y, _436.z)) > (View.View_GlobalVolumeCenterAndExtent[1].w * View.View_GlobalVolumeTexelSize)) { - _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[1u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x; + _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[1u].www, View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x; } else { @@ -382,14 +382,14 @@ struct main0_patchIn float _523; if (fast::min(_459.x, fast::min(_459.y, _459.z)) > (View.View_GlobalVolumeCenterAndExtent[2].w * View.View_GlobalVolumeTexelSize)) { - _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[2u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x; + _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[2u].www, View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x; } else { float _511; if (_480 > (View.View_GlobalVolumeCenterAndExtent[3].w * View.View_GlobalVolumeTexelSize)) { - _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[3u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0)); + _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[3u].www, View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0)); } else { @@ -401,12 +401,12 @@ struct main0_patchIn } _547 = _535; } - float3 _565 = _264 + ((_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w)) * float3(((_137[0] * gl_TessCoord.x) + (_137[1] * gl_TessCoord.y)) + (_137[2] * gl_TessCoord.z))); + float3 _565 = fma(_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w), float3(fma(_137[2], gl_TessCoord.z, fma(_137[0], gl_TessCoord.x, _137[1] * gl_TessCoord.y))), _264); float4 _574 = View.View_TranslatedWorldToClip * float4(_565.x, _565.y, _565.z, _235.w); - _574.z = _574.z + (0.001000000047497451305389404296875 * _574.w); + _574.z = fma(0.001000000047497451305389404296875, _574.w, _574.z); out.gl_Position = _574; - out.out_var_TEXCOORD6 = ((_120[0] * _198) + (_120[1] * _202)) + (_120[2] * _207); - out.out_var_TEXCOORD7 = ((_121[0] * _198) + (_121[1] * _202)) + (_121[2] * _207); + out.out_var_TEXCOORD6 = fma(_120[2], _207, fma(_120[0], _198, _120[1] * _202)); + out.out_var_TEXCOORD7 = fma(_121[2], _207, fma(_121[0], _198, _121[1] * _202)); out.out_var_TEXCOORD10_centroid = float4(_256.x, _256.y, _256.z, _118.w); out.out_var_TEXCOORD11_centroid = _259; out.gl_ClipDistance[0u] = dot(View.View_GlobalClippingPlane, float4(_565.xyz - float3(View.View_PreViewTranslation), 1.0)); diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese index b4dbe705..b0e1504b 100644 --- a/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese +++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese @@ -186,9 +186,9 @@ struct main0_patchIn float4 _139 = float4(_119 * 3.0); float4 _143 = float4(_120 * 3.0); float4 _150 = float4(_121 * 3.0); - float4 _164 = ((((((((((_101[0][0] * float4(_119)) * _127) + ((_101[1][0] * float4(_120)) * _131)) + ((_101[2][0] * float4(_121)) * _136)) + ((_101[0][1] * _139) * _131)) + ((_101[0][2] * _143) * _127)) + ((_101[1][1] * _143) * _136)) + ((_101[1][2] * _150) * _131)) + ((_101[2][1] * _150) * _127)) + ((_101[2][2] * _139) * _136)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127) * _131); - float3 _179 = ((_93[0].xyz * float3(gl_TessCoord.x)) + (_93[1].xyz * float3(gl_TessCoord.y))).xyz + (_93[2].xyz * float3(gl_TessCoord.z)); - float4 _182 = ((_94[0] * _127) + (_94[1] * _131)) + (_94[2] * _136); + float4 _164 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127, _131, fma(_101[2][2] * _139, _136, fma(_101[2][1] * _150, _127, fma(_101[1][2] * _150, _131, fma(_101[1][1] * _143, _136, fma(_101[0][2] * _143, _127, fma(_101[0][1] * _139, _131, fma(_101[2][0] * float4(_121), _136, fma(_101[0][0] * float4(_119), _127, (_101[1][0] * float4(_120)) * _131))))))))); + float3 _179 = fma(_93[2].xyz, float3(gl_TessCoord.z), fma(_93[0].xyz, float3(gl_TessCoord.x), _93[1].xyz * float3(gl_TessCoord.y)).xyz); + float4 _182 = fma(_94[2], _136, fma(_94[0], _127, _94[1] * _131)); float4 _189 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_164.x, _164.y, _164.z, _164.w); float4 _200; if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_189.z < 0.0)) @@ -204,7 +204,7 @@ struct main0_patchIn } float _209 = abs(dot(float3(ShadowDepthPass.ShadowDepthPass_ViewMatrix[0].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[1].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[2].z), _182.xyz)); float4 _234 = _200; - _234.z = ((_200.z * ShadowDepthPass.ShadowDepthPass_ShadowParams.w) + ((ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(1.0 - (_209 * _209), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w; + _234.z = fma(_200.z, ShadowDepthPass.ShadowDepthPass_ShadowParams.w, fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(1.0 - (_209 * _209), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w; out.out_var_TEXCOORD10_centroid = float4(_179.x, _179.y, _179.z, _90.w); out.out_var_TEXCOORD11_centroid = _182; out.out_var_TEXCOORD6 = 0.0; diff --git a/reference/opt/shaders-ue4/asm/tese/ds-texcoord-array.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-texcoord-array.asm.tese index 346d7e3f..aa2fff0a 100644 --- a/reference/opt/shaders-ue4/asm/tese/ds-texcoord-array.asm.tese +++ b/reference/opt/shaders-ue4/asm/tese/ds-texcoord-array.asm.tese @@ -276,7 +276,7 @@ struct main0_patchIn float4 _111 = float4(gl_TessCoord.x); float4 _113 = float4(gl_TessCoord.y); float4 _116 = float4(gl_TessCoord.z); - float4 _118 = ((_97[0] * _111) + (_97[1] * _113)) + (_97[2] * _116); + float4 _118 = fma(_97[2], _116, fma(_97[0], _111, _97[1] * _113)); spvUnsafeArray _72; _72 = _79[0]; spvUnsafeArray _71; @@ -286,7 +286,7 @@ struct main0_patchIn spvUnsafeArray _73; for (int _133 = 0; _133 < 1; ) { - _73[_133] = (_72[_133] * _111) + (_71[_133] * _113); + _73[_133] = fma(_72[_133], _111, _71[_133] * _113); _133++; continue; } @@ -295,22 +295,22 @@ struct main0_patchIn spvUnsafeArray _74; _74 = _79[2]; float3 _155 = float3(gl_TessCoord.z); - float3 _157 = ((_77[0].xyz * _120) + (_77[1].xyz * _123)).xyz + (_77[2].xyz * _155); + float3 _157 = fma(_77[2].xyz, _155, fma(_77[0].xyz, _120, _77[1].xyz * _123).xyz); spvUnsafeArray _76; for (int _164 = 0; _164 < 1; ) { - _76[_164] = _75[_164] + (_74[_164] * _116); + _76[_164] = fma(_74[_164], _116, _75[_164]); _164++; continue; } float4 _181 = float4(_118.x, _118.y, _118.z, _118.w); out.out_var_TEXCOORD10_centroid = float4(_157.x, _157.y, _157.z, _68.w); - out.out_var_TEXCOORD11_centroid = ((_78[0] * _111) + (_78[1] * _113)) + (_78[2] * _116); + out.out_var_TEXCOORD11_centroid = fma(_78[2], _116, fma(_78[0], _111, _78[1] * _113)); out_var_TEXCOORD0 = _76; - out.out_var_COLOR1 = ((_80[0] * _111) + (_80[1] * _113)) + (_80[2] * _116); - out.out_var_COLOR2 = ((_81[0] * _111) + (_81[1] * _113)) + (_81[2] * _116); + out.out_var_COLOR1 = fma(_80[2], _116, fma(_80[0], _111, _80[1] * _113)); + out.out_var_COLOR2 = fma(_81[2], _116, fma(_81[0], _111, _81[1] * _113)); out.out_var_TEXCOORD6 = _181; - out.out_var_TEXCOORD7 = ((_98[0] * _120) + (_98[1] * _123)) + (_98[2] * _155); + out.out_var_TEXCOORD7 = fma(_98[2], _155, fma(_98[0], _120, _98[1] * _123)); out.gl_Position = View.View_TranslatedWorldToClip * _181; out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; return out; diff --git a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert index e40239db..23ed3570 100644 --- a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert +++ b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -338,11 +338,11 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float4 _161; if (_119.x > 0.5) { - _161 = (_132 * float4(_Globals.SectionLods.w)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[3]); + _161 = fma(_132, float4(_Globals.SectionLods.w), (float4(1.0) - _132) * _Globals.NeighborSectionLod[3]); } else { - _161 = (_132 * float4(_Globals.SectionLods.z)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[2]); + _161 = fma(_132, float4(_Globals.SectionLods.z), (float4(1.0) - _132) * _Globals.NeighborSectionLod[2]); } _186 = _161; } @@ -351,11 +351,11 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float4 _185; if (_119.x > 0.5) { - _185 = (_132 * float4(_Globals.SectionLods.y)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[1]); + _185 = fma(_132, float4(_Globals.SectionLods.y), (float4(1.0) - _132) * _Globals.NeighborSectionLod[1]); } else { - _185 = (_132 * float4(_Globals.SectionLods.x)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[0]); + _185 = fma(_132, float4(_Globals.SectionLods.x), (float4(1.0) - _132) * _Globals.NeighborSectionLod[0]); } _186 = _185; } @@ -390,15 +390,14 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float _220 = _121.x; float3 _235 = select(select(select(select(select(float3(0.03125, _121.yy), float3(0.0625, _220, _121.y), bool3(_207 < 5.0)), float3(0.125, in_var_ATTRIBUTE1[1].w, _220), bool3(_207 < 4.0)), float3(0.25, in_var_ATTRIBUTE1[1].zw), bool3(_207 < 3.0)), float3(0.5, in_var_ATTRIBUTE1[1].yz), bool3(_207 < 2.0)), float3(1.0, in_var_ATTRIBUTE1[1].xy), bool3(_207 < 1.0)); float _236 = _235.x; - float _245 = (((in_var_ATTRIBUTE1[0].x * 65280.0) + (in_var_ATTRIBUTE1[0].y * 255.0)) - 32768.0) * 0.0078125; - float _252 = (((in_var_ATTRIBUTE1[0].z * 65280.0) + (in_var_ATTRIBUTE1[0].w * 255.0)) - 32768.0) * 0.0078125; + float _245 = (fma(in_var_ATTRIBUTE1[0].x, 65280.0, in_var_ATTRIBUTE1[0].y * 255.0) - 32768.0) * 0.0078125; + float _252 = (fma(in_var_ATTRIBUTE1[0].z, 65280.0, in_var_ATTRIBUTE1[0].w * 255.0) - 32768.0) * 0.0078125; float2 _257 = floor(_122 * float2(_236)); float2 _271 = float2((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * _236) - 1.0, fast::max((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * 0.5) * _236, 2.0) - 1.0) * float2(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.y); float3 _287 = mix(float3(_257 / float2(_271.x), mix(_245, _252, _235.y)), float3(floor(_257 * float2(0.5)) / float2(_271.y), mix(_245, _252, _235.z)), float3(_206 - _207)); float2 _288 = _119.xy; - float2 _292 = _288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww; - float3 _296 = _287 + float3(_292, 0.0); - float4 _322 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _296.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _296.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0); + float3 _296 = _287 + float3(_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, 0.0); + float4 _322 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _296.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _296.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0); float2 _323 = _287.xy; float4 _338 = float4(_322.x, _322.y, _322.z, _322.w); float4 _339 = View.View_TranslatedWorldToClip * _338; @@ -415,12 +414,11 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff if (_357 > 0.0) { float _361 = _357 * _346; - float _362 = _361 * _354; - float _365 = View.View_WorldCameraOrigin[2] + _362; + float _365 = fma(_361, _354, View.View_WorldCameraOrigin[2]); _393 = (1.0 - _361) * _347; _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.z * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.w))); _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.x * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.y))); - _396 = _354 - _362; + _396 = _354 - (_361 * _354); } else { @@ -431,7 +429,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff } float _400 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * _396); float _417 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * _396); - float _428 = (_395 * ((abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : (0.693147182464599609375 - (0.2402265071868896484375 * _400)))) + (_394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : (0.693147182464599609375 - (0.2402265071868896484375 * _417)))); + float _428 = fma(_395, (abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : (0.693147182464599609375 - (0.2402265071868896484375 * _400)), _394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : (0.693147182464599609375 - (0.2402265071868896484375 * _417)))); float3 _459; if (MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w >= 0.0) { @@ -443,12 +441,11 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff } bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w); float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w); - float3 _475 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)); - float4 _479 = float4(_475, _471); + float4 _479 = float4(fma(MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz, float3(1.0 - _471), select(_459, float3(0.0), bool3(_468))), _471); float4 _482 = _338; _482.w = _339.w; - out.out_var_TEXCOORD0 = ((_323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw) + _292).xy; - out.out_var_TEXCOORD1 = ((_323 * LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy) + LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw) + (_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz); + out.out_var_TEXCOORD0 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, _323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw).xy; + out.out_var_TEXCOORD1 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz, fma(_323, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw)); out.out_var_TEXCOORD2 = float4(float4(0.0).x, float4(0.0).y, _479.x, _479.y); out.out_var_TEXCOORD3 = float4(float4(0.0).x, float4(0.0).y, _479.z, _479.w); out.out_var_TEXCOORD8 = _482; diff --git a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert index 5398fec3..81533a4a 100644 --- a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert +++ b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -297,8 +297,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float _146 = _137.w; float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz; float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); - float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (((CurveTexture.sample(CurveTextureSampler, (EmitterUniforms.EmitterUniforms_MiscCurve.xy + (EmitterUniforms.EmitterUniforms_MiscCurve.zw * float2(_146))), level(0.0)) * EmitterUniforms.EmitterUniforms_MiscScale) + EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); - float3 _239 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _137.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _137.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; + float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (fma(CurveTexture.sample(CurveTextureSampler, fma(EmitterUniforms.EmitterUniforms_MiscCurve.zw, float2(_146), EmitterUniforms.EmitterUniforms_MiscCurve.xy), level(0.0)), EmitterUniforms.EmitterUniforms_MiscScale, EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); + float3 _239 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _137.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _137.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll); float3 _251 = mix(mix(float3(View.View_ViewRight), float3(View.View_HMDViewNoRollRight), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.w)); float3 _259 = mix(-mix(float3(View.View_ViewUp), float3(View.View_HMDViewNoRollUp), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.w)); @@ -360,10 +360,10 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff _335 = _333; _336 = _334; } - float _339 = ((_145.z + ((_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale) * _146)) * 6.283185482025146484375) + EmitterUniforms.EmitterUniforms_RotationBias; + float _339 = fma(fma(_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale, _146, _145.z), 6.283185482025146484375, EmitterUniforms.EmitterUniforms_RotationBias); float3 _342 = float3(sin(_339)); float3 _344 = float3(cos(_339)); - float3 _367 = _239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))); + float3 _367 = _239 + fma(float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)), fma(_342, _336, _344 * _335), float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335))); float4 _371 = float4(_367, 1.0); float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w); float4 _386; @@ -379,7 +379,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff _386 = _375; } float4 _396 = _386; - _396.z = ((_386.z * MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y) + MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w; + _396.z = fma(_386.z, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w; out.out_var_TEXCOORD6 = 0.0; out.gl_Position = _396; return out; diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag index 6497ad77..97d3b74f 100644 --- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -33,10 +33,11 @@ void main() for (int _60 = -3; _60 <= 3; ) { float _64 = float(_60); + float _68 = exp(((-_64) * _64) * 0.2222220003604888916015625); vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64)); - float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53); - _55 += (_72.x * _78); - _58 += _78; + float _77 = float(abs(_72.y - _50) < _53); + _55 = fma(_72.x, _68 * _77, _55); + _58 = fma(_68, _77, _58); _60++; continue; } diff --git a/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag index f2a2a40f..9aa9a471 100644 --- a/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag +++ b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag @@ -4,7 +4,6 @@ layout(location = 0) out vec4 FragColor; void main() { - uvec2 unpacked = uvec2(18u, 52u); - FragColor = vec4(float(unpacked.x), float(unpacked.y), 1.0, 1.0); + FragColor = vec4(18.0, 52.0, 1.0, 1.0); } diff --git a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag index 769afddd..fcad3fbf 100644 --- a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag @@ -102,15 +102,12 @@ uniform sampler2D SPIRV_Cross_Combined_2; layout(location = 0) out vec4 _5; -_28 _74; - void main() { - vec2 _82 = gl_FragCoord.xy * _19._m23.xy; vec4 _88 = _7._m2 * _7._m0.xyxy; vec2 _95 = _88.xy; vec2 _96 = _88.zw; - vec2 _97 = clamp(_82 + (vec2(0.0, -2.0) * _7._m0.xy), _95, _96); + vec2 _97 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -2.0) * _7._m0.xy), _95, _96); vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0); vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0); float _114 = _113.y; @@ -124,7 +121,7 @@ void main() { _129 = _109; } - vec2 _144 = clamp(_82 + (vec2(-1.0) * _7._m0.xy), _95, _96); + vec2 _144 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0) * _7._m0.xy), _95, _96); vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0); vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0); float _161 = _160.y; @@ -138,7 +135,7 @@ void main() { _176 = _156; } - vec2 _191 = clamp(_82 + (vec2(0.0, -1.0) * _7._m0.xy), _95, _96); + vec2 _191 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -1.0) * _7._m0.xy), _95, _96); vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0); vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0); float _208 = _207.y; @@ -152,7 +149,7 @@ void main() { _223 = _203; } - vec2 _238 = clamp(_82 + (vec2(1.0, -1.0) * _7._m0.xy), _95, _96); + vec2 _238 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, -1.0) * _7._m0.xy), _95, _96); vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0); vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0); float _255 = _254.y; @@ -166,7 +163,7 @@ void main() { _270 = _250; } - vec2 _285 = clamp(_82 + (vec2(-2.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _285 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-2.0, 0.0) * _7._m0.xy), _95, _96); vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0); vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0); float _302 = _301.y; @@ -180,7 +177,7 @@ void main() { _317 = _297; } - vec2 _332 = clamp(_82 + (vec2(-1.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _332 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 0.0) * _7._m0.xy), _95, _96); vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0); vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0); float _349 = _348.y; @@ -194,7 +191,7 @@ void main() { _364 = _344; } - vec2 _379 = clamp(_82, _95, _96); + vec2 _379 = clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96); vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0); vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0); float _396 = _395.y; @@ -208,7 +205,7 @@ void main() { _411 = _391; } - vec2 _426 = clamp(_82 + (vec2(1.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _426 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, 0.0) * _7._m0.xy), _95, _96); vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0); vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0); float _443 = _442.y; @@ -222,7 +219,7 @@ void main() { _458 = _438; } - vec2 _473 = clamp(_82 + (vec2(2.0, 0.0) * _7._m0.xy), _95, _96); + vec2 _473 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(2.0, 0.0) * _7._m0.xy), _95, _96); vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0); vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0); float _490 = _489.y; @@ -236,7 +233,7 @@ void main() { _505 = _485; } - vec2 _520 = clamp(_82 + (vec2(-1.0, 1.0) * _7._m0.xy), _95, _96); + vec2 _520 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 1.0) * _7._m0.xy), _95, _96); vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0); vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0); float _537 = _536.y; @@ -250,7 +247,7 @@ void main() { _552 = _532; } - vec2 _567 = clamp(_82 + (vec2(0.0, 1.0) * _7._m0.xy), _95, _96); + vec2 _567 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 1.0) * _7._m0.xy), _95, _96); vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0); vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0); float _584 = _583.y; @@ -264,7 +261,7 @@ void main() { _599 = _579; } - vec2 _614 = clamp(_82 + _7._m0.xy, _95, _96); + vec2 _614 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96); vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0); vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0); float _631 = _630.y; @@ -278,7 +275,7 @@ void main() { _646 = _626; } - vec2 _661 = clamp(_82 + (vec2(0.0, 2.0) * _7._m0.xy), _95, _96); + vec2 _661 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 2.0) * _7._m0.xy), _95, _96); vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0); vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0); float _678 = _677.y; @@ -293,9 +290,8 @@ void main() _693 = _673; } vec3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * vec3(0.125); - _28 _704; - _704._m0 = vec4(_702.x, _702.y, _702.z, vec4(0.0).w); - _704._m0.w = 1.0; - _5 = _704._m0; + _28 _750 = _28(vec4(_702.x, _702.y, _702.z, vec4(0.0).w)); + _750._m0.w = 1.0; + _5 = _750._m0; } diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp index bfd78300..75ad1f91 100644 --- a/reference/opt/shaders/comp/generate_height.comp +++ b/reference/opt/shaders/comp/generate_height.comp @@ -50,7 +50,7 @@ void main() vec2 _396 = _317.yy; vec2 _399 = _396 * _137.distribution[_281].yx; vec2 _426 = _396 * _137.distribution[_291].yx; - vec2 _434 = (_137.distribution[_291] * _391) + vec2(-_426.x, _426.y); - _225.heights[_281] = packHalf2x16(((_137.distribution[_281] * _391) + vec2(-_399.x, _399.y)) + vec2(_434.x, -_434.y)); + vec2 _434 = _137.distribution[_291] * _391 + vec2(-_426.x, _426.y); + _225.heights[_281] = packHalf2x16((_137.distribution[_281] * _391 + vec2(-_399.x, _399.y)) + vec2(_434.x, -_434.y)); } diff --git a/reference/opt/shaders/comp/insert.comp b/reference/opt/shaders/comp/insert.comp index 8b85e3bc..97c55dd5 100644 --- a/reference/opt/shaders/comp/insert.comp +++ b/reference/opt/shaders/comp/insert.comp @@ -6,16 +6,9 @@ layout(binding = 0, std430) writeonly buffer SSBO vec4 out_data[]; } _27; -vec4 _53; - void main() { - vec4 _46; - _46.x = 10.0; - _46.y = 30.0; - _46.z = 70.0; - _46.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _46; + _27.out_data[gl_GlobalInvocationID.x] = vec4(10.0, 30.0, 70.0, 90.0); _27.out_data[gl_GlobalInvocationID.x].y = 20.0; } diff --git a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag index 9019ac0d..8eaea64e 100644 --- a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag +++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -20,7 +20,7 @@ void main() _62 = 0.0; for (float _61 = 0.0; _61 < _44.count; ) { - _62 += (_24 * _34); + _62 = _24 * _34 + _62; _61 += 1.0; continue; } diff --git a/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag index dbb008af..90b000f9 100644 --- a/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag +++ b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag @@ -41,6 +41,6 @@ void main() { FragColor = (((spvWorkaroundRowMajor(_17.rm2).rm.B * spvWorkaroundRowMajor(_35.rm.B)) * spvWorkaroundRowMajor(_42.A)) * spvWorkaroundRowMajor(_42.C)) * Clip; FragColor += (_56.D * Clip); - FragColor += (_42.A[1] * Clip); + FragColor = fma(_42.A[1], Clip, FragColor); } diff --git a/reference/opt/shaders/tesc/water_tess.tesc b/reference/opt/shaders/tesc/water_tess.tesc index 79da68be..d3d9c8b3 100644 --- a/reference/opt/shaders/tesc/water_tess.tesc +++ b/reference/opt/shaders/tesc/water_tess.tesc @@ -46,20 +46,20 @@ void main() else { vOutPatchPosBase = vPatchPosBase[0]; - vec2 _681 = (vPatchPosBase[0] + (vec2(-0.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _710 = (vPatchPosBase[0] + (vec2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _681 = (vec2(-0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _710 = (vec2(0.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _729 = clamp(log2((length(_41.uCamPos - vec3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _739 = (vPatchPosBase[0] + (vec2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _768 = (vPatchPosBase[0] + (vec2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _739 = (vec2(1.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _768 = (vec2(-0.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _787 = clamp(log2((length(_41.uCamPos - vec3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _797 = (vPatchPosBase[0] + (vec2(0.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _797 = (vec2(0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _816 = clamp(log2((length(_41.uCamPos - vec3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _826 = (vPatchPosBase[0] + (vec2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _826 = (vec2(1.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _845 = clamp(log2((length(_41.uCamPos - vec3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _855 = (vPatchPosBase[0] + (vec2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; - vec2 _884 = (vPatchPosBase[0] + (vec2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _855 = (vec2(-0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; + vec2 _884 = (vec2(0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _903 = clamp(log2((length(_41.uCamPos - vec3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x); - vec2 _913 = (vPatchPosBase[0] + (vec2(1.5) * _41.uPatchSize)) * _41.uScale.xy; + vec2 _913 = (vec2(1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy; float _614 = dot(vec4(_787, _816, clamp(log2((length(_41.uCamPos - vec3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), vec4(0.25)); float _620 = dot(vec4(clamp(log2((length(_41.uCamPos - vec3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), vec4(0.25)); float _626 = dot(vec4(_729, clamp(log2((length(_41.uCamPos - vec3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), vec4(0.25)); diff --git a/reference/opt/shaders/tese/water_tess.tese b/reference/opt/shaders/tese/water_tess.tese index 2878c75b..c862cfbd 100644 --- a/reference/opt/shaders/tese/water_tess.tese +++ b/reference/opt/shaders/tese/water_tess.tese @@ -21,16 +21,15 @@ layout(location = 0) out vec3 vWorld; void main() { - vec2 _202 = vOutPatchPosBase + (gl_TessCoord.xy * _31.uPatchSize); + vec2 _202 = gl_TessCoord.xy * _31.uPatchSize + vOutPatchPosBase; vec2 _216 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x)); float _223 = mix(_216.x, _216.y, gl_TessCoord.y); mediump float mp_copy_223 = _223; mediump float _225 = floor(mp_copy_223); - vec2 _125 = _202 * _31.uInvHeightmapSize; vec2 _141 = _31.uInvHeightmapSize * exp2(_225); - vGradNormalTex = vec4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw); - mediump vec3 _256 = mix(textureLod(uHeightmapDisplacement, _125 + (_141 * 0.5), _225).xyz, textureLod(uHeightmapDisplacement, _125 + (_141 * 1.0), _225 + 1.0).xyz, vec3(mp_copy_223 - _225)); - vec2 _171 = (_202 * _31.uScale.xy) + _256.yz; + vGradNormalTex = vec4(_202 * _31.uInvHeightmapSize + (_31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw); + mediump vec3 _256 = mix(textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 0.5), _225).xyz, textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 1.0), _225 + 1.0).xyz, vec3(mp_copy_223 - _225)); + vec2 _171 = _202 * _31.uScale.xy + _256.yz; vWorld = vec3(_171.x, _256.x, _171.y); gl_Position = _31.uMVP * vec4(vWorld, 1.0); } diff --git a/reference/opt/shaders/vert/ground.vert b/reference/opt/shaders/vert/ground.vert index 23d8b27d..c0d637c6 100644 --- a/reference/opt/shaders/vert/ground.vert +++ b/reference/opt/shaders/vert/ground.vert @@ -82,14 +82,15 @@ void main() _385 = 0u; } vec4 _345 = vec4((_310 + uvec2(_384, _385)).xyxy & (~_317).xxyy); - vec2 _173 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw) + mix(_345.xy, _345.zw, vec2(_301 - _303))) * _156.InvGroundSize_PatchScale.xy; + vec2 _167 = _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw + mix(_345.xy, _345.zw, vec2(_301 - _303)); + vec2 _173 = _167 * _156.InvGroundSize_PatchScale.xy; mediump float _362 = textureLod(TexLOD, _173, 0.0).x * 7.96875; float hp_copy_362 = _362; float _364 = floor(hp_copy_362); vec2 _185 = _156.InvGroundSize_PatchScale.xy * exp2(_364); - vec3 _230 = (vec3(_173.x, mix(textureLod(TexHeightmap, _173 + (_185 * 0.5), _364).x, textureLod(TexHeightmap, _173 + (_185 * 1.0), _364 + 1.0).x, _362 - _364), _173.y) * _156.GroundScale.xyz) + _156.GroundPosition.xyz; + vec3 _230 = vec3(_173.x, mix(textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 0.5), _364).x, textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 1.0), _364 + 1.0).x, _362 - _364), _173.y) * _156.GroundScale.xyz + _156.GroundPosition.xyz; EyeVec = _230 - _236.g_CamPos.xyz; - TexCoord = _173 + (_156.InvGroundSize_PatchScale.xy * 0.5); + TexCoord = _167 * _156.InvGroundSize_PatchScale.xy + (_156.InvGroundSize_PatchScale.xy * 0.5); gl_Position = (((_236.g_ViewProj_Row0 * _230.x) + (_236.g_ViewProj_Row1 * _230.y)) + (_236.g_ViewProj_Row2 * _230.z)) + _236.g_ViewProj_Row3; } diff --git a/reference/opt/shaders/vert/invariant.vert b/reference/opt/shaders/vert/invariant.vert index 648ea294..31e0c2d4 100644 --- a/reference/opt/shaders/vert/invariant.vert +++ b/reference/opt/shaders/vert/invariant.vert @@ -9,8 +9,7 @@ layout(location = 0) invariant out vec4 vColor; void main() { - vec4 _20 = vInput1 * vInput2; - vec4 _21 = vInput0 + _20; + vec4 _21 = vInput1 * vInput2 + vInput0; gl_Position = _21; vec4 _27 = vInput0 - vInput1; vec4 _29 = _27 * vInput2; diff --git a/reference/opt/shaders/vert/ocean.vert b/reference/opt/shaders/vert/ocean.vert index a2790870..eed3407a 100644 --- a/reference/opt/shaders/vert/ocean.vert +++ b/reference/opt/shaders/vert/ocean.vert @@ -57,8 +57,6 @@ layout(location = 0) in vec4 Position; layout(location = 0) out vec3 EyeVec; layout(location = 1) out vec4 TexCoord; -uvec4 _476; - void main() { float _351 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); @@ -76,8 +74,6 @@ void main() { _467 = 0u; } - uvec4 _445; - _445.x = _467; bool _380 = _360.y < 32u; uint _470; if (_380) @@ -88,7 +84,6 @@ void main() { _470 = 0u; } - _445.y = _470; uint _472; if (_370) { @@ -98,7 +93,6 @@ void main() { _472 = 0u; } - _445.z = _472; uint _474; if (_380) { @@ -108,17 +102,16 @@ void main() { _474 = 0u; } - _445.w = _474; - vec4 _416 = vec4((_360.xyxy + _445) & (~_367).xxyy); - vec2 _197 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw) + mix(_416.xy, _416.zw, vec2(_351 - _353))) * _180.InvOceanSize_PatchScale.xy; + vec4 _416 = vec4((_360.xyxy + uvec4(_467, _470, _472, _474)) & (~_367).xxyy); + vec2 _197 = (_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw + mix(_416.xy, _416.zw, vec2(_351 - _353))) * _180.InvOceanSize_PatchScale.xy; vec2 _204 = _197 * _180.NormalTexCoordScale.zw; mediump float _433 = textureLod(TexLOD, _197, 0.0).x * 7.96875; float hp_copy_433 = _433; float _435 = floor(hp_copy_433); vec2 _220 = (_180.InvOceanSize_PatchScale.xy * exp2(_435)) * _180.NormalTexCoordScale.zw; - vec3 _267 = ((vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _204 + (_220 * 0.5), _435).yxz, textureLod(TexDisplacement, _204 + (_220 * 1.0), _435 + 1.0).yxz, vec3(_433 - _435))) * _180.OceanScale.xyz) + _180.OceanPosition.xyz; + vec3 _267 = (vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 0.5), _435).yxz, textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 1.0), _435 + 1.0).yxz, vec3(_433 - _435))) * _180.OceanScale.xyz + _180.OceanPosition.xyz; EyeVec = _267 - _273.g_CamPos.xyz; - TexCoord = vec4(_204, _204 * _180.NormalTexCoordScale.xy) + ((_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw); + TexCoord = (_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw + vec4(_204, _204 * _180.NormalTexCoordScale.xy); gl_Position = (((_273.g_ViewProj_Row0 * _267.x) + (_273.g_ViewProj_Row1 * _267.y)) + (_273.g_ViewProj_Row2 * _267.z)) + _273.g_ViewProj_Row3; } diff --git a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk index dbc8073e..512bc915 100644 --- a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk @@ -26,16 +26,16 @@ layout(location = 1) out uvec4 FragColorUint; void main() { - i8vec4 _199 = unpack8(20); - ssbo.i8[0] = _199.x; - ssbo.i8[1] = _199.y; - ssbo.i8[2] = _199.z; - ssbo.i8[3] = _199.w; - u8vec4 _224 = unpack8(20u); - ssbo.u8[0] = _224.x; - ssbo.u8[1] = _224.y; - ssbo.u8[2] = _224.z; - ssbo.u8[3] = _224.w; + i8vec4 _204 = unpack8(20); + ssbo.i8[0] = _204.x; + ssbo.i8[1] = _204.y; + ssbo.i8[2] = _204.z; + ssbo.i8[3] = _204.w; + u8vec4 _229 = unpack8(20u); + ssbo.u8[0] = _229.x; + ssbo.u8[1] = _229.y; + ssbo.u8[2] = _229.z; + ssbo.u8[3] = _229.w; i8vec4 _249 = i8vec4(vColor); FragColorInt = ivec4((((((_249 + i8vec4(registers.i8)) + i8vec4(-40)) + i8vec4(-50)) + i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40))) + i8vec4(ssbo.i8[4])) + i8vec4(ubo.i8)); FragColorUint = uvec4((((((u8vec4(_249) + u8vec4(registers.u8)) + u8vec4(216)) + u8vec4(206)) + u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40))) + u8vec4(ssbo.u8[4])) + u8vec4(ubo.u8)); diff --git a/spirv.h b/spirv.h index c15736e2..38f55874 100644 --- a/spirv.h +++ b/spirv.h @@ -31,7 +31,7 @@ /* ** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python, C#, D +** C, C++, C++11, JSON, Lua, Python, C#, D, Beef ** ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -41,6 +41,8 @@ ** - C# will use enum classes in the Specification class located in the "Spv" namespace, ** e.g.: Spv.Specification.SourceLanguage.GLSL ** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** - Beef will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL ** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have @@ -70,6 +72,7 @@ typedef enum SpvSourceLanguage_ { SpvSourceLanguageOpenCL_CPP = 4, SpvSourceLanguageHLSL = 5, SpvSourceLanguageCPP_for_OpenCL = 6, + SpvSourceLanguageSYCL = 7, SpvSourceLanguageMax = 0x7fffffff, } SpvSourceLanguage; @@ -184,6 +187,7 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeNoGlobalOffsetINTEL = 5895, SpvExecutionModeNumSIMDWorkitemsINTEL = 5896, SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + SpvExecutionModeNamedBarrierCountINTEL = 6417, SpvExecutionModeMax = 0x7fffffff, } SpvExecutionMode; @@ -546,6 +550,8 @@ typedef enum SpvDecoration_ { SpvDecorationPrefetchINTEL = 5902, SpvDecorationStallEnableINTEL = 5905, SpvDecorationFuseLoopsInFunctionINTEL = 5907, + SpvDecorationAliasScopeINTEL = 5914, + SpvDecorationNoAliasINTEL = 5915, SpvDecorationBufferLocationINTEL = 5921, SpvDecorationIOPipeStorageINTEL = 5944, SpvDecorationFunctionFloatingPointModeINTEL = 6080, @@ -677,6 +683,7 @@ typedef enum SpvBuiltIn_ { SpvBuiltInSMCountNV = 5375, SpvBuiltInWarpIDNV = 5376, SpvBuiltInSMIDNV = 5377, + SpvBuiltInCullMaskKHR = 6021, SpvBuiltInMax = 0x7fffffff, } SpvBuiltIn; @@ -804,6 +811,8 @@ typedef enum SpvMemoryAccessShift_ { SpvMemoryAccessMakePointerVisibleKHRShift = 4, SpvMemoryAccessNonPrivatePointerShift = 5, SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessAliasScopeINTELMaskShift = 16, + SpvMemoryAccessNoAliasINTELMaskShift = 17, SpvMemoryAccessMax = 0x7fffffff, } SpvMemoryAccessShift; @@ -818,6 +827,8 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, SpvMemoryAccessNonPrivatePointerMask = 0x00000020, SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, + SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000, + SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000, } SpvMemoryAccessMask; typedef enum SpvScope_ { @@ -1059,6 +1070,7 @@ typedef enum SpvCapability_ { SpvCapabilityFPGAMemoryAccessesINTEL = 5898, SpvCapabilityFPGAClusterAttributesINTEL = 5904, SpvCapabilityLoopFuseINTEL = 5906, + SpvCapabilityMemoryAccessAliasingINTEL = 5910, SpvCapabilityFPGABufferLocationINTEL = 5920, SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922, SpvCapabilityUSMStorageClassesINTEL = 5935, @@ -1073,13 +1085,17 @@ typedef enum SpvCapability_ { SpvCapabilityDotProductInput4x8BitPackedKHR = 6018, SpvCapabilityDotProduct = 6019, SpvCapabilityDotProductKHR = 6019, + SpvCapabilityRayCullMaskKHR = 6020, SpvCapabilityBitInstructions = 6025, + SpvCapabilityGroupNonUniformRotateKHR = 6026, SpvCapabilityAtomicFloat32AddEXT = 6033, SpvCapabilityAtomicFloat64AddEXT = 6034, SpvCapabilityLongConstantCompositeINTEL = 6089, SpvCapabilityOptNoneINTEL = 6094, SpvCapabilityAtomicFloat16AddEXT = 6095, SpvCapabilityDebugInfoModuleINTEL = 6114, + SpvCapabilitySplitBarrierINTEL = 6141, + SpvCapabilityGroupUniformArithmeticKHR = 6400, SpvCapabilityMax = 0x7fffffff, } SpvCapability; @@ -1535,6 +1551,7 @@ typedef enum SpvOp_ { SpvOpSubgroupAllKHR = 4428, SpvOpSubgroupAnyKHR = 4429, SpvOpSubgroupAllEqualKHR = 4430, + SpvOpGroupNonUniformRotateKHR = 4431, SpvOpSubgroupReadInvocationKHR = 4432, SpvOpTraceRayKHR = 4445, SpvOpExecuteCallableKHR = 4446, @@ -1801,6 +1818,9 @@ typedef enum SpvOp_ { SpvOpArbitraryFloatPowRINTEL = 5881, SpvOpArbitraryFloatPowNINTEL = 5882, SpvOpLoopControlINTEL = 5887, + SpvOpAliasDomainDeclINTEL = 5911, + SpvOpAliasScopeDeclINTEL = 5912, + SpvOpAliasScopeListDeclINTEL = 5913, SpvOpFixedSqrtINTEL = 5923, SpvOpFixedRecipINTEL = 5924, SpvOpFixedRsqrtINTEL = 5925, @@ -1839,10 +1859,23 @@ typedef enum SpvOp_ { SpvOpTypeStructContinuedINTEL = 6090, SpvOpConstantCompositeContinuedINTEL = 6091, SpvOpSpecConstantCompositeContinuedINTEL = 6092, + SpvOpControlBarrierArriveINTEL = 6142, + SpvOpControlBarrierWaitINTEL = 6143, + SpvOpGroupIMulKHR = 6401, + SpvOpGroupFMulKHR = 6402, + SpvOpGroupBitwiseAndKHR = 6403, + SpvOpGroupBitwiseOrKHR = 6404, + SpvOpGroupBitwiseXorKHR = 6405, + SpvOpGroupLogicalAndKHR = 6406, + SpvOpGroupLogicalOrKHR = 6407, + SpvOpGroupLogicalXorKHR = 6408, SpvOpMax = 0x7fffffff, } SpvOp; #ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { *hasResult = *hasResultType = false; switch (opcode) { @@ -2197,6 +2230,7 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break; case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; @@ -2452,6 +2486,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; @@ -2490,6 +2527,16 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ diff --git a/spirv.hpp b/spirv.hpp index 3d500ebb..48d93d64 100644 --- a/spirv.hpp +++ b/spirv.hpp @@ -26,7 +26,7 @@ // the Binary Section of the SPIR-V specification. // Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python, C#, D +// C, C++, C++11, JSON, Lua, Python, C#, D, Beef // // - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL // - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL @@ -36,6 +36,8 @@ // - C# will use enum classes in the Specification class located in the "Spv" namespace, // e.g.: Spv.Specification.SourceLanguage.GLSL // - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// - Beef will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL // // Some tokens act like mask values, which can be OR'd together, // while others are mutually exclusive. The mask-like ones have @@ -66,6 +68,7 @@ enum SourceLanguage { SourceLanguageOpenCL_CPP = 4, SourceLanguageHLSL = 5, SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageSYCL = 7, SourceLanguageMax = 0x7fffffff, }; @@ -180,6 +183,7 @@ enum ExecutionMode { ExecutionModeNoGlobalOffsetINTEL = 5895, ExecutionModeNumSIMDWorkitemsINTEL = 5896, ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeNamedBarrierCountINTEL = 6417, ExecutionModeMax = 0x7fffffff, }; @@ -542,6 +546,8 @@ enum Decoration { DecorationPrefetchINTEL = 5902, DecorationStallEnableINTEL = 5905, DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationAliasScopeINTEL = 5914, + DecorationNoAliasINTEL = 5915, DecorationBufferLocationINTEL = 5921, DecorationIOPipeStorageINTEL = 5944, DecorationFunctionFloatingPointModeINTEL = 6080, @@ -673,6 +679,7 @@ enum BuiltIn { BuiltInSMCountNV = 5375, BuiltInWarpIDNV = 5376, BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, BuiltInMax = 0x7fffffff, }; @@ -800,6 +807,8 @@ enum MemoryAccessShift { MemoryAccessMakePointerVisibleKHRShift = 4, MemoryAccessNonPrivatePointerShift = 5, MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessAliasScopeINTELMaskShift = 16, + MemoryAccessNoAliasINTELMaskShift = 17, MemoryAccessMax = 0x7fffffff, }; @@ -814,6 +823,8 @@ enum MemoryAccessMask { MemoryAccessMakePointerVisibleKHRMask = 0x00000010, MemoryAccessNonPrivatePointerMask = 0x00000020, MemoryAccessNonPrivatePointerKHRMask = 0x00000020, + MemoryAccessAliasScopeINTELMaskMask = 0x00010000, + MemoryAccessNoAliasINTELMaskMask = 0x00020000, }; enum Scope { @@ -1055,6 +1066,7 @@ enum Capability { CapabilityFPGAMemoryAccessesINTEL = 5898, CapabilityFPGAClusterAttributesINTEL = 5904, CapabilityLoopFuseINTEL = 5906, + CapabilityMemoryAccessAliasingINTEL = 5910, CapabilityFPGABufferLocationINTEL = 5920, CapabilityArbitraryPrecisionFixedPointINTEL = 5922, CapabilityUSMStorageClassesINTEL = 5935, @@ -1069,13 +1081,17 @@ enum Capability { CapabilityDotProductInput4x8BitPackedKHR = 6018, CapabilityDotProduct = 6019, CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, CapabilityBitInstructions = 6025, + CapabilityGroupNonUniformRotateKHR = 6026, CapabilityAtomicFloat32AddEXT = 6033, CapabilityAtomicFloat64AddEXT = 6034, CapabilityLongConstantCompositeINTEL = 6089, CapabilityOptNoneINTEL = 6094, CapabilityAtomicFloat16AddEXT = 6095, CapabilityDebugInfoModuleINTEL = 6114, + CapabilitySplitBarrierINTEL = 6141, + CapabilityGroupUniformArithmeticKHR = 6400, CapabilityMax = 0x7fffffff, }; @@ -1531,6 +1547,7 @@ enum Op { OpSubgroupAllKHR = 4428, OpSubgroupAnyKHR = 4429, OpSubgroupAllEqualKHR = 4430, + OpGroupNonUniformRotateKHR = 4431, OpSubgroupReadInvocationKHR = 4432, OpTraceRayKHR = 4445, OpExecuteCallableKHR = 4446, @@ -1797,6 +1814,9 @@ enum Op { OpArbitraryFloatPowRINTEL = 5881, OpArbitraryFloatPowNINTEL = 5882, OpLoopControlINTEL = 5887, + OpAliasDomainDeclINTEL = 5911, + OpAliasScopeDeclINTEL = 5912, + OpAliasScopeListDeclINTEL = 5913, OpFixedSqrtINTEL = 5923, OpFixedRecipINTEL = 5924, OpFixedRsqrtINTEL = 5925, @@ -1835,10 +1855,23 @@ enum Op { OpTypeStructContinuedINTEL = 6090, OpConstantCompositeContinuedINTEL = 6091, OpSpecConstantCompositeContinuedINTEL = 6092, + OpControlBarrierArriveINTEL = 6142, + OpControlBarrierWaitINTEL = 6143, + OpGroupIMulKHR = 6401, + OpGroupFMulKHR = 6402, + OpGroupBitwiseAndKHR = 6403, + OpGroupBitwiseOrKHR = 6404, + OpGroupBitwiseXorKHR = 6405, + OpGroupLogicalAndKHR = 6406, + OpGroupLogicalOrKHR = 6407, + OpGroupLogicalXorKHR = 6408, OpMax = 0x7fffffff, }; #ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { *hasResult = *hasResultType = false; switch (opcode) { @@ -2193,6 +2226,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; @@ -2448,6 +2482,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case OpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; @@ -2486,6 +2523,16 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ From e45d01c41f799c5f2abf74ce1a0811efdb9484d5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 27 May 2022 13:27:48 +0200 Subject: [PATCH 2/2] Emit KHR barycentrics if source enables the KHR extension. For roundtrip purposes, need to match KHR or NV extension. --- .../frag/barycentric-nv-nopersp.msl22.frag | 4 +- .../frag/barycentric-nv.msl22.frag | 4 +- .../frag/barycentric-nv-nopersp.msl22.frag | 4 +- .../frag/barycentric-nv.msl22.frag | 4 +- spirv_glsl.cpp | 60 ++++++++++++++----- spirv_glsl.hpp | 1 + spirv_msl.cpp | 31 +++++----- 7 files changed, 71 insertions(+), 37 deletions(-) diff --git a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag index 53b8a743..012d99b5 100644 --- a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag +++ b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag @@ -15,14 +15,14 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]]; + float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) { main0_out out = {}; int _23 = 3 * int(gl_PrimitiveID); - out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspNV.z); + out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspEXT.z); return out; } diff --git a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag index ae2c704d..d6e9dcdb 100644 --- a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag +++ b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag @@ -15,14 +15,14 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]]; + float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) { main0_out out = {}; int _23 = 3 * int(gl_PrimitiveID); - out.value = ((_19.uvs[_23] * in.gl_BaryCoordNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNV.z); + out.value = ((_19.uvs[_23] * in.gl_BaryCoordEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordEXT.z); return out; } diff --git a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag index ef19fbf8..1259283c 100644 --- a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag +++ b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag @@ -15,7 +15,7 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]]; + float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) @@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[ float2 uv0 = _19.uvs[(3 * prim) + 0]; float2 uv1 = _19.uvs[(3 * prim) + 1]; float2 uv2 = _19.uvs[(3 * prim) + 2]; - out.value = ((uv0 * in.gl_BaryCoordNoPerspNV.x) + (uv1 * in.gl_BaryCoordNoPerspNV.y)) + (uv2 * in.gl_BaryCoordNoPerspNV.z); + out.value = ((uv0 * in.gl_BaryCoordNoPerspEXT.x) + (uv1 * in.gl_BaryCoordNoPerspEXT.y)) + (uv2 * in.gl_BaryCoordNoPerspEXT.z); return out; } diff --git a/reference/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/shaders-msl/frag/barycentric-nv.msl22.frag index 1d2e4c2f..386d2d26 100644 --- a/reference/shaders-msl/frag/barycentric-nv.msl22.frag +++ b/reference/shaders-msl/frag/barycentric-nv.msl22.frag @@ -15,7 +15,7 @@ struct main0_out struct main0_in { - float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]]; + float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]]; }; fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]]) @@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[ float2 uv0 = _19.uvs[(3 * prim) + 0]; float2 uv1 = _19.uvs[(3 * prim) + 1]; float2 uv2 = _19.uvs[(3 * prim) + 2]; - out.value = ((uv0 * in.gl_BaryCoordNV.x) + (uv1 * in.gl_BaryCoordNV.y)) + (uv2 * in.gl_BaryCoordNV.z); + out.value = ((uv0 * in.gl_BaryCoordEXT.x) + (uv1 * in.gl_BaryCoordEXT.y)) + (uv2 * in.gl_BaryCoordEXT.z); return out; } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index a1c3ebf5..d755f223 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -619,6 +619,11 @@ void CompilerGLSL::find_static_extensions() SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); require_extension_internal("GL_OVR_multiview2"); } + + // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. + for (auto &ext : ir.declared_extensions) + if (ext == "SPV_NV_fragment_shader_barycentric") + barycentric_is_nv = true; } void CompilerGLSL::ray_tracing_khr_fixup_locations() @@ -1206,14 +1211,23 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) res += "__explicitInterpAMD "; } - if (flags.get(DecorationPerVertexNV)) + if (flags.get(DecorationPerVertexKHR)) { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("pervertexNV requires ESSL 320."); + SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("pervertexNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - res += "pervertexNV "; + SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + res += "pervertexNV "; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + res += "pervertexEXT "; + } } return res; @@ -8758,24 +8772,42 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInIncomingRayFlagsKHR: return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV"; - case BuiltInBaryCoordNV: + case BuiltInBaryCoordKHR: { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNV"; + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordEXT"; + } } case BuiltInBaryCoordNoPerspNV: { if (options.es && options.version < 320) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); else if (!options.es && options.version < 450) - SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); - require_extension_internal("GL_NV_fragment_shader_barycentric"); - return "gl_BaryCoordNoPerspNV"; + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspEXT"; + } } case BuiltInFragStencilRefEXT: diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index c4396991..edb09fa6 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -877,6 +877,7 @@ protected: bool requires_transpose_3x3 = false; bool requires_transpose_4x4 = false; bool ray_tracing_is_khr = false; + bool barycentric_is_nv = false; void ray_tracing_khr_fixup_locations(); bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 22ab1900..4687742b 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3175,6 +3175,9 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st return; } + if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR)) + SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL."); + // If variable names alias, they will end up with wrong names in the interface struct, because // there might be aliases in the member name cache and there would be a mismatch in fixup_in code. // Make sure to register the variables as unique resource names ahead of time. @@ -3458,7 +3461,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) bool builtin_is_stage_in_out = builtin_is_gl_in_out || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || - bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || + bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR || bi_type == BuiltInFragDepth || bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask; @@ -3515,7 +3518,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) } // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. - if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV)) + if (is_active && (bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR)) { if (has_seen_barycentric) SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL."); @@ -11072,8 +11075,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInSampleId: case BuiltInSampleMask: case BuiltInLayer: - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: quals = builtin_qualifier(builtin); break; @@ -11089,7 +11092,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in else quals = member_location_attribute_qualifier(type, index); - if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV) + if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR) { if (has_member_decoration(type.self, index, DecorationFlat) || has_member_decoration(type.self, index, DecorationCentroid) || @@ -11555,8 +11558,8 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) // Fragment function in case BuiltInSamplePosition: case BuiltInHelperInvocation: - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: return false; case BuiltInViewIndex: return get_execution_model() == ExecutionModelFragment && msl_options.multiview && @@ -14494,8 +14497,8 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); break; @@ -14732,16 +14735,14 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Shouldn't be reached. SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); - case BuiltInBaryCoordNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + case BuiltInBaryCoordKHR: if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); return "barycentric_coord, center_perspective"; - case BuiltInBaryCoordNoPerspNV: - // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + case BuiltInBaryCoordNoPerspKHR: if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) @@ -14831,8 +14832,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInHelperInvocation: return "bool"; - case BuiltInBaryCoordNV: - case BuiltInBaryCoordNoPerspNV: + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: // Use the type as declared, can be 1, 2 or 3 components. return type_to_glsl(get_variable_data_type(get(id)));