From ef6bde658021dae7732d9b628d58b2e0905c938c Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 17 Jan 2022 12:49:02 +0100 Subject: [PATCH] Do not forward expressions which carry a huge amount of dependencies. Need to force temporaries eventually, or compilers have a tendency to explode. --- .../asm/frag/depth-compare.asm.frag | 3 +- .../asm/frag/global-constant-arrays.asm.frag | 8 +- .../padded-float-array-member-defef.asm.frag | 29 +- .../asm/vert/array-missing-copies.asm.vert | 3 +- .../asm/vert/texture-buffer.asm.vert | 3 +- .../comp/expression-nesting-limits.asm.comp | 34 +++ .../inliner-dominator-inside-loop.asm.frag | 5 +- .../asm/frag/depth-compare.asm.frag | 3 +- .../asm/frag/global-constant-arrays.asm.frag | 8 +- .../padded-float-array-member-defef.asm.frag | 29 +- .../asm/vert/array-missing-copies.asm.vert | 3 +- .../asm/vert/texture-buffer.asm.vert | 3 +- .../comp/expression-nesting-limits.asm.comp | 288 ++++++++++++++++++ spirv_glsl.cpp | 8 + 14 files changed, 374 insertions(+), 53 deletions(-) create mode 100644 reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp create mode 100644 shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp diff --git a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag index b54a05fe..d18b674f 100644 --- a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -254,6 +254,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G { float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint((select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x * 255.0) + 0.5)), 0).xy), 0); float _263 = _260.y * 0.5; + float3 _266 = _148 - (_236 * float3(_263)); float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0); float _445; if (_160) @@ -286,7 +287,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G } _311 = _307; } - float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_148 - (_236 * float3(_263)), 1.0); + float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0); float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w); float _329 = (1.0 / (((_318.z / _318.w) * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w; float _342 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, (_278 + (_286 * float3(2.5))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; diff --git a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag index 03fa7c53..70100279 100644 --- a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -181,15 +181,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; float2 _680 = select(float2(_616, (_616 * (((-3.0) * _616) + 2.86999988555908203125)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); - float _681 = _680.x; - float _682 = _680.y; - float _683 = fast::max(_682, 1.0000000133514319600180897396058e-10); + float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10); float3 _685 = _391; - _685.x = _681 / _683; + _685.x = _680.x / _683; float3 _686 = _685; _686.y = 1.0; float3 _690 = _686; - _690.z = ((1.0 - _681) - _682) / _683; + _690.z = ((1.0 - _680.x) - _680.y) / _683; float3 _693 = _391; _693.x = 0.950455963611602783203125; float3 _694 = _693; diff --git a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag index f1c43630..f7e65ae7 100644 --- a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -183,15 +183,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; float2 _706 = select(float2(_642, (_642 * (((-3.0) * _642) + 2.86999988555908203125)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); - float _707 = _706.x; - float _708 = _706.y; - float _709 = fast::max(_708, 1.0000000133514319600180897396058e-10); + float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10); float3 _711 = _523; - _711.x = _707 / _709; + _711.x = _706.x / _709; float3 _712 = _711; _712.y = 1.0; float3 _716 = _712; - _716.z = ((1.0 - _707) - _708) / _709; + _716.z = ((1.0 - _706.x) - _706.y) / _709; float3 _719 = _523; _719.x = 0.950455963611602783203125; float3 _720 = _719; @@ -392,40 +390,37 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float3 _3103; if (_Globals.OutputDevice == 0u) { - float _3063 = _1324.x; float _3075; for (;;) { - if (_3063 < 0.00313066993840038776397705078125) + if (_1324.x < 0.00313066993840038776397705078125) { - _3075 = _3063 * 12.9200000762939453125; + _3075 = _1324.x * 12.9200000762939453125; break; } - _3075 = (pow(_3063, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3075 = (pow(_1324.x, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } - float _3076 = _1324.y; float _3088; for (;;) { - if (_3076 < 0.00313066993840038776397705078125) + if (_1324.y < 0.00313066993840038776397705078125) { - _3088 = _3076 * 12.9200000762939453125; + _3088 = _1324.y * 12.9200000762939453125; break; } - _3088 = (pow(_3076, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3088 = (pow(_1324.y, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } - float _3089 = _1324.z; float _3101; for (;;) { - if (_3089 < 0.00313066993840038776397705078125) + if (_1324.z < 0.00313066993840038776397705078125) { - _3101 = _3089 * 12.9200000762939453125; + _3101 = _1324.z * 12.9200000762939453125; break; } - _3101 = (pow(_3089, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3101 = (pow(_1324.z, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } _3103 = float3(_3075, _3088, _3101); diff --git a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert index 520b58e7..e40239db 100644 --- a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert +++ b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -443,7 +443,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff } bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w); float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w); - float4 _479 = float4((MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)), _471); + float3 _475 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)); + float4 _479 = float4(_475, _471); float4 _482 = _338; _482.w = _339.w; out.out_var_TEXCOORD0 = ((_323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw) + _292).xy; diff --git a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert index 7938ecfc..ea7db420 100644 --- a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert +++ b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -363,7 +363,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float _339 = ((_145.z + ((_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale) * _146)) * 6.283185482025146484375) + EmitterUniforms.EmitterUniforms_RotationBias; float3 _342 = float3(sin(_339)); float3 _344 = float3(cos(_339)); - float4 _371 = float4(_239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))), 1.0); + float3 _367 = _239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))); + float4 _371 = float4(_367, 1.0); float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w); float4 _386; if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0)) diff --git a/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp new file mode 100644 index 00000000..83a9b83f --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp @@ -0,0 +1,34 @@ +#version 430 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _4_5 +{ + uint _m0[16]; +} _5; + +layout(binding = 1, std430) buffer _4_6 +{ + uint _m0[16]; +} _6; + +layout(binding = 2, std430) buffer _4_7 +{ + uint _m0[16]; +} _7; + +vec4 _88(vec4 _89) +{ + for (int _91 = 0; _91 < 16; _91++) + { + uint _163 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _5._m0[_91])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))); + uint _225 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _163))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))); + _7._m0[_91] = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _225)))))))))))))); + } + return _89; +} + +void main() +{ + vec4 _87 = _88(vec4(uvec4(gl_GlobalInvocationID, 0u))); +} + diff --git a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag index 4049c482..ff9c122f 100644 --- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag @@ -216,10 +216,9 @@ void main() vec2 _376 = texture(SPIRV_Cross_CombinedShadowMapTextureShadowMapSampler, IN_PosLightSpace_Reflectance.xyz.xy).xy; float _392 = (1.0 - (((step(_376.x, IN_PosLightSpace_Reflectance.xyz.z) * clamp(9.0 - (20.0 * abs(IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w; vec3 _403 = mix(_318.xyz, texture(SPIRV_Cross_CombinedEnvironmentMapTextureEnvironmentMapSampler, reflect(-IN_View_Depth.xyz, _329)).xyz, vec3(_312)); - vec4 _404 = vec4(_403.x, _403.y, _403.z, _318.w); - vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308))); + vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * vec4(_403.x, _403.y, _403.z, _318.w).xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308))); vec4 _425 = vec4(_422.x, _422.y, _422.z, _124.w); - _425.w = _404.w; + _425.w = vec4(_403.x, _403.y, _403.z, _318.w).w; vec2 _435 = min(IN_Uv_EdgeDistance1.wz, IN_UvStuds_EdgeDistance2.wz); float _439 = min(_435.x, _435.y) / _163; vec3 _445 = _425.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0); diff --git a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag index 9ee2d89b..5f336c3d 100644 --- a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag +++ b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -254,6 +254,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G { float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint((select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x * 255.0) + 0.5)), 0).xy), 0); float _263 = _260.y * 0.5; + float3 _266 = _148 - (_236 * float3(_263)); float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0); float _445; if (_160) @@ -286,7 +287,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G } _311 = _307; } - float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_148 - (_236 * float3(_263)), 1.0); + float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0); float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w); float _329 = (1.0 / (((_318.z / _318.w) * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w; float _342 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, (_278 + (_286 * float3(2.5))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323; diff --git a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag index c3d26e65..f189d1d2 100644 --- a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag +++ b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -181,15 +181,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; float2 _680 = select(float2(_616, ((((-3.0) * _616) * _616) + (2.86999988555908203125 * _616)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653); - float _681 = _680.x; - float _682 = _680.y; - float _683 = fast::max(_682, 1.0000000133514319600180897396058e-10); + float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10); float3 _685 = _391; - _685.x = _681 / _683; + _685.x = _680.x / _683; float3 _686 = _685; _686.y = 1.0; float3 _690 = _686; - _690.z = ((1.0 - _681) - _682) / _683; + _690.z = ((1.0 - _680.x) - _680.y) / _683; float _691 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); float3 _693 = _391; _693.x = 0.3127000033855438232421875 / _691; diff --git a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag index 83857f22..e3d3f546 100644 --- a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -183,15 +183,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; float2 _706 = select(float2(_642, ((((-3.0) * _642) * _642) + (2.86999988555908203125 * _642)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679); - float _707 = _706.x; - float _708 = _706.y; - float _709 = fast::max(_708, 1.0000000133514319600180897396058e-10); + float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10); float3 _711 = _523; - _711.x = _707 / _709; + _711.x = _706.x / _709; float3 _712 = _711; _712.y = 1.0; float3 _716 = _712; - _716.z = ((1.0 - _707) - _708) / _709; + _716.z = ((1.0 - _706.x) - _706.y) / _709; float _717 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); float3 _719 = _523; _719.x = 0.3127000033855438232421875 / _717; @@ -394,40 +392,37 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float3 _3103; if (_Globals.OutputDevice == 0u) { - float _3063 = _1324.x; float _3075; for (;;) { - if (_3063 < 0.00313066993840038776397705078125) + if (_1324.x < 0.00313066993840038776397705078125) { - _3075 = _3063 * 12.9200000762939453125; + _3075 = _1324.x * 12.9200000762939453125; break; } - _3075 = (pow(_3063, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3075 = (pow(_1324.x, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } - float _3076 = _1324.y; float _3088; for (;;) { - if (_3076 < 0.00313066993840038776397705078125) + if (_1324.y < 0.00313066993840038776397705078125) { - _3088 = _3076 * 12.9200000762939453125; + _3088 = _1324.y * 12.9200000762939453125; break; } - _3088 = (pow(_3076, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3088 = (pow(_1324.y, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } - float _3089 = _1324.z; float _3101; for (;;) { - if (_3089 < 0.00313066993840038776397705078125) + if (_1324.z < 0.00313066993840038776397705078125) { - _3101 = _3089 * 12.9200000762939453125; + _3101 = _1324.z * 12.9200000762939453125; break; } - _3101 = (pow(_3089, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; + _3101 = (pow(_1324.z, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875; break; } _3103 = float3(_3075, _3088, _3101); diff --git a/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert index 3d3e0e51..67097c57 100644 --- a/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert +++ b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert @@ -452,7 +452,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff } bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w); float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w); - _97[0] = float4((MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)), _471); + float3 _475 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468)); + _97[0] = float4(_475, _471); float4 _482 = _338; _482.w = _339.w; out.out_var_TEXCOORD0 = ((_323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw) + _292).xy; diff --git a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert index 7938ecfc..ea7db420 100644 --- a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert +++ b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -363,7 +363,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float _339 = ((_145.z + ((_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale) * _146)) * 6.283185482025146484375) + EmitterUniforms.EmitterUniforms_RotationBias; float3 _342 = float3(sin(_339)); float3 _344 = float3(cos(_339)); - float4 _371 = float4(_239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))), 1.0); + float3 _367 = _239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335)))); + float4 _371 = float4(_367, 1.0); float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w); float4 _386; if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0)) diff --git a/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp new file mode 100644 index 00000000..e5ca1cbb --- /dev/null +++ b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp @@ -0,0 +1,288 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 10117 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %gl_GlobalInvocationID + OpExecutionMode %1 LocalSize 1 1 1 + OpSource GLSL 430 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_uint_int_16 ArrayStride 4 + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_struct_4 BufferBlock + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 0 + OpDecorate %6 DescriptorSet 0 + OpDecorate %6 Binding 1 + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 2 + %void = OpTypeVoid + %bool = OpTypeBool + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v2int = OpTypeVector %int 2 + %v2uint = OpTypeVector %uint 2 + %v2float = OpTypeVector %float 2 + %v3int = OpTypeVector %int 3 + %v3uint = OpTypeVector %uint 3 + %v3float = OpTypeVector %float 3 + %v4int = OpTypeVector %int 4 + %v4uint = OpTypeVector %uint 4 + %v4float = OpTypeVector %float 4 + %v4bool = OpTypeVector %bool 4 + %23 = OpTypeFunction %v4float %v4float + %24 = OpTypeFunction %bool + %25 = OpTypeFunction %void +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v2int = OpTypePointer Input %v2int +%_ptr_Input_v2uint = OpTypePointer Input %v2uint +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_v4int = OpTypePointer Input %v4int +%_ptr_Input_v4uint = OpTypePointer Input %v4uint +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_int = OpTypePointer Output %int +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v2int = OpTypePointer Output %v2int +%_ptr_Output_v2uint = OpTypePointer Output %v2uint +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_v4int = OpTypePointer Output %v4int +%_ptr_Output_v4uint = OpTypePointer Output %v4uint +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %float_0_5 = OpConstant %float 0.5 + %float_n1 = OpConstant %float -1 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %int_4 = OpConstant %int 4 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_32 = OpConstant %uint 32 + %uint_4 = OpConstant %uint 4 +%uint_2147483647 = OpConstant %uint 2147483647 + %66 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %67 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1 + %68 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32 +%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3 +%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32 +%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2 +%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3 +%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %int_16 = OpConstant %int 16 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_arr_uint_int_16 = OpTypeArray %uint %int_16 + %_struct_4 = OpTypeStruct %_arr_uint_int_16 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %5 = OpVariable %_ptr_Uniform__struct_4 Uniform + %6 = OpVariable %_ptr_Uniform__struct_4 Uniform + %7 = OpVariable %_ptr_Uniform__struct_4 Uniform + %1 = OpFunction %void None %25 + %83 = OpLabel + %84 = OpLoad %v3uint %gl_GlobalInvocationID + %85 = OpCompositeConstruct %v4uint %84 %uint_0 + %86 = OpConvertUToF %v4float %85 + %87 = OpFunctionCall %v4float %88 %86 + OpReturn + OpFunctionEnd + %88 = OpFunction %v4float None %23 + %89 = OpFunctionParameter %v4float + %92 = OpLabel + %93 = OpVariable %_ptr_Function_int Function + OpStore %93 %int_0 + OpBranch %94 + %94 = OpLabel + %95 = OpLoad %int %93 + %96 = OpSLessThan %bool %95 %int_16 + OpLoopMerge %97 %10100 None + OpBranchConditional %96 %10101 %97 + %10101 = OpLabel + %10102 = OpLoad %int %93 + %90 = OpAccessChain %_ptr_Uniform_uint %6 %int_0 %10102 + %91 = OpLoad %uint %90 + %98 = OpAccessChain %_ptr_Uniform_uint %5 %int_0 %10102 + %99 = OpLoad %uint %98 + %100 = OpIAdd %uint %91 %99 + %101 = OpIAdd %uint %91 %100 + %102 = OpIAdd %uint %91 %101 + %103 = OpIAdd %uint %91 %102 + %104 = OpIAdd %uint %91 %103 + %105 = OpIAdd %uint %91 %104 + %106 = OpIAdd %uint %91 %105 + %107 = OpIAdd %uint %91 %106 + %108 = OpIAdd %uint %91 %107 + %109 = OpIAdd %uint %91 %108 + %110 = OpIAdd %uint %91 %109 + %111 = OpIAdd %uint %91 %110 + %112 = OpIAdd %uint %91 %111 + %113 = OpIAdd %uint %91 %112 + %114 = OpIAdd %uint %91 %113 + %115 = OpIAdd %uint %91 %114 + %116 = OpIAdd %uint %91 %115 + %117 = OpIAdd %uint %91 %116 + %118 = OpIAdd %uint %91 %117 + %119 = OpIAdd %uint %91 %118 + %120 = OpIAdd %uint %91 %119 + %121 = OpIAdd %uint %91 %120 + %122 = OpIAdd %uint %91 %121 + %123 = OpIAdd %uint %91 %122 + %124 = OpIAdd %uint %91 %123 + %125 = OpIAdd %uint %91 %124 + %126 = OpIAdd %uint %91 %125 + %127 = OpIAdd %uint %91 %126 + %128 = OpIAdd %uint %91 %127 + %129 = OpIAdd %uint %91 %128 + %130 = OpIAdd %uint %91 %129 + %131 = OpIAdd %uint %91 %130 + %132 = OpIAdd %uint %91 %131 + %133 = OpIAdd %uint %91 %132 + %134 = OpIAdd %uint %91 %133 + %135 = OpIAdd %uint %91 %134 + %136 = OpIAdd %uint %91 %135 + %137 = OpIAdd %uint %91 %136 + %138 = OpIAdd %uint %91 %137 + %139 = OpIAdd %uint %91 %138 + %140 = OpIAdd %uint %91 %139 + %141 = OpIAdd %uint %91 %140 + %142 = OpIAdd %uint %91 %141 + %143 = OpIAdd %uint %91 %142 + %144 = OpIAdd %uint %91 %143 + %145 = OpIAdd %uint %91 %144 + %146 = OpIAdd %uint %91 %145 + %147 = OpIAdd %uint %91 %146 + %148 = OpIAdd %uint %91 %147 + %149 = OpIAdd %uint %91 %148 + %150 = OpIAdd %uint %91 %149 + %151 = OpIAdd %uint %91 %150 + %152 = OpIAdd %uint %91 %151 + %153 = OpIAdd %uint %91 %152 + %154 = OpIAdd %uint %91 %153 + %155 = OpIAdd %uint %91 %154 + %156 = OpIAdd %uint %91 %155 + %157 = OpIAdd %uint %91 %156 + %158 = OpIAdd %uint %91 %157 + %159 = OpIAdd %uint %91 %158 + %160 = OpIAdd %uint %91 %159 + %161 = OpIAdd %uint %91 %160 + %162 = OpIAdd %uint %91 %161 + %163 = OpIAdd %uint %91 %162 + %164 = OpIAdd %uint %91 %163 + %165 = OpIAdd %uint %91 %164 + %166 = OpIAdd %uint %91 %165 + %167 = OpIAdd %uint %91 %166 + %168 = OpIAdd %uint %91 %167 + %169 = OpIAdd %uint %91 %168 + %170 = OpIAdd %uint %91 %169 + %171 = OpIAdd %uint %91 %170 + %172 = OpIAdd %uint %91 %171 + %173 = OpIAdd %uint %91 %172 + %174 = OpIAdd %uint %91 %173 + %175 = OpIAdd %uint %91 %174 + %176 = OpIAdd %uint %91 %175 + %177 = OpIAdd %uint %91 %176 + %178 = OpIAdd %uint %91 %177 + %179 = OpIAdd %uint %91 %178 + %180 = OpIAdd %uint %91 %179 + %181 = OpIAdd %uint %91 %180 + %182 = OpIAdd %uint %91 %181 + %183 = OpIAdd %uint %91 %182 + %184 = OpIAdd %uint %91 %183 + %185 = OpIAdd %uint %91 %184 + %186 = OpIAdd %uint %91 %185 + %187 = OpIAdd %uint %91 %186 + %188 = OpIAdd %uint %91 %187 + %189 = OpIAdd %uint %91 %188 + %190 = OpIAdd %uint %91 %189 + %191 = OpIAdd %uint %91 %190 + %192 = OpIAdd %uint %91 %191 + %193 = OpIAdd %uint %91 %192 + %194 = OpIAdd %uint %91 %193 + %195 = OpIAdd %uint %91 %194 + %196 = OpIAdd %uint %91 %195 + %197 = OpIAdd %uint %91 %196 + %198 = OpIAdd %uint %91 %197 + %199 = OpIAdd %uint %91 %198 + %200 = OpIAdd %uint %91 %199 + %201 = OpIAdd %uint %91 %200 + %202 = OpIAdd %uint %91 %201 + %203 = OpIAdd %uint %91 %202 + %204 = OpIAdd %uint %91 %203 + %205 = OpIAdd %uint %91 %204 + %206 = OpIAdd %uint %91 %205 + %207 = OpIAdd %uint %91 %206 + %208 = OpIAdd %uint %91 %207 + %209 = OpIAdd %uint %91 %208 + %210 = OpIAdd %uint %91 %209 + %211 = OpIAdd %uint %91 %210 + %212 = OpIAdd %uint %91 %211 + %213 = OpIAdd %uint %91 %212 + %214 = OpIAdd %uint %91 %213 + %215 = OpIAdd %uint %91 %214 + %216 = OpIAdd %uint %91 %215 + %217 = OpIAdd %uint %91 %216 + %218 = OpIAdd %uint %91 %217 + %219 = OpIAdd %uint %91 %218 + %220 = OpIAdd %uint %91 %219 + %221 = OpIAdd %uint %91 %220 + %222 = OpIAdd %uint %91 %221 + %223 = OpIAdd %uint %91 %222 + %224 = OpIAdd %uint %91 %223 + %225 = OpIAdd %uint %91 %224 + %226 = OpIAdd %uint %91 %225 + %227 = OpIAdd %uint %91 %226 + %228 = OpIAdd %uint %91 %227 + %229 = OpIAdd %uint %91 %228 + %230 = OpIAdd %uint %91 %229 + %231 = OpIAdd %uint %91 %230 + %232 = OpIAdd %uint %91 %231 + %233 = OpIAdd %uint %91 %232 + %234 = OpIAdd %uint %91 %233 + %235 = OpIAdd %uint %91 %234 + %236 = OpIAdd %uint %91 %235 + %result = OpIAdd %uint %91 %236 + %10103 = OpAccessChain %_ptr_Uniform_uint %7 %int_0 %10102 + OpStore %10103 %result + OpBranch %10100 + %10100 = OpLabel + %10104 = OpLoad %int %93 + %10105 = OpIAdd %int %10104 %int_1 + OpStore %93 %10105 + OpBranch %94 + %97 = OpLabel + OpReturnValue %89 + OpFunctionEnd + %10106 = OpFunction %bool None %24 + %10107 = OpLabel + %10108 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %10109 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1 + %10110 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2 + %10111 = OpLoad %uint %10108 + %10112 = OpLoad %uint %10109 + %10113 = OpLoad %uint %10110 + %10114 = OpBitwiseOr %uint %10111 %10112 + %10115 = OpBitwiseOr %uint %10113 %10114 + %10116 = OpIEqual %bool %10115 %uint_0 + OpReturnValue %10116 + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index a96c9671..e43398a1 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -9498,6 +9498,7 @@ bool CompilerGLSL::should_forward(uint32_t id) const { // If id is a variable we will try to forward it regardless of force_temporary check below // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL + auto *var = maybe_get(id); if (var && var->forwardable) return true; @@ -9506,6 +9507,13 @@ bool CompilerGLSL::should_forward(uint32_t id) const if (options.force_temporary) return false; + // If an expression carries enough dependencies we need to stop forwarding at some point, + // or we explode compilers. There are usually limits to how much we can nest expressions. + auto *expr = maybe_get(id); + const uint32_t max_expression_dependencies = 64; + if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) + return false; + // Immutable expression can always be forwarded. if (is_immutable(id)) return true;