Implement context-sensitive expression read tracking.

When inside a loop, treat any read of outer expressions to happen multiple times, forcing a temporary of said outer expressions. This avoids the problem where we can end up relying on loop-invariant code motion to happen in the compiler when converting optimized shaders.
2020-06-29 12:20:35 +02:00 · 2020-06-29 12:20:35 +02:00 · 3afbfdb090
commit 3afbfdb090
parent 05188aca69
15 changed files with 95 additions and 12 deletions
--- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;

 void main()
 {
+    vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
    vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
    float _50 = _49.y;
+    float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
    float _55;
    float _58;
    _55 = 0.0;
@ -31,8 +33,8 @@ void main()
    for (int _60 = -3; _60 <= 3; )
    {
        float _64 = float(_60);
-        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
-        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
+        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
        _55 += (_72.x * _78);
        _58 += _78;
        _60++;
--- a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag
+++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag
@ -14,12 +14,13 @@ layout(location = 0) out vec4 fragColor;

 void main()
 {
+    highp float _24 = 1.0 / float(textureSize(tex, 0).x);
    highp float _34 = dFdx(vertex.x);
    float _62;
    _62 = 0.0;
    for (float _61 = 0.0; _61 < _44.count; )
    {
-        _62 += ((1.0 / float(textureSize(tex, 0).x)) * _34);
+        _62 += (_24 * _34);
        _61 += 1.0;
        continue;
    }
--- a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp
+++ b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp
@ -33,6 +33,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
 {
    device foo* _46 = select_buffer(buf, cb);
    device foo* _45 = _46;
+    thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb);
    device foo* _48 = _45;
    device int* _52;
    device int* _55;
@ -46,7 +47,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
        _58 = *_55;
        if (_57 != _58)
        {
-            int _66 = (_57 + _58) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x);
+            int _66 = (_57 + _58) + int((*_47).x);
            *_52 = _66;
            *_55 = _66;
            _52 = &_52[1u];
--- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
@ -125,6 +125,7 @@ void main()
    _151.UvStuds = IN_UvStuds_EdgeDistance2.xy;
    SurfaceInput _156 = _151;
    _156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25;
+    float _160 = clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0);
    float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
    float _165 = clamp(1.0 - _163, 0.0, 1.0);
    vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0;
@ -141,7 +142,7 @@ void main()
        else
        {
            float _180 = 1.0 / (1.0 - 0.0);
-            _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
+            _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0)));
            break;
        }
        _193 = _192;
--- a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
+++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
@ -216,6 +216,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
 {
    main0_out out = {};
    float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w);
+    float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation);
    float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz);
    float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0)));
    float2 _190 = (_187.xy * float2(2.0)) - float2(1.0);
@ -330,7 +331,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
    {
        if (_491 < _Globals.NumDynamicPointLights)
        {
-            float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - (in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation));
+            float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179;
            float _502 = dot(_501, _501);
            float3 _505 = _501 * float3(rsqrt(_502));
            _507 = normalize(_181 + _505);
--- a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
+++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
    float3 _151 = normalize(-_148);
    float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
    float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
+    float _171 = floor(_170);
    float _172 = 1.0 / _170;
    float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
    float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
    float _189 = 1.0;
    for (;;)
    {
-        if (float(_185) < (floor(_170) + 2.0))
+        if (float(_185) < (_171 + 2.0))
        {
            _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
            if (_180 < _188)
--- a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
+++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
    float3 _151 = normalize(-_148);
    float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
    float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
+    float _171 = floor(_170);
    float _172 = 1.0 / _170;
    float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
    float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
    float _189 = 1.0;
    for (;;)
    {
-        if (float(_185) < (floor(_170) + 2.0))
+        if (float(_185) < (_171 + 2.0))
        {
            _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
            if (_180 < _188)
--- a/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag
+++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag
@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
    uint _107 = _103 + 1u;
    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
    {
+        float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
        float _122 = _96.x;
        float _123 = _96.y;
        spvUnsafeArray<float3, 8> _73;
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
        _158 = float3(500000.0);
        for (int _160 = 0; _160 < 8; )
        {
-            float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
+            float3 _166 = _73[_160] - _121;
            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
            _155 = fast::max(_155, _170);
            _158 = fast::min(_158, _170);
--- a/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
+++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
    uint _107 = _103 + 1u;
    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
    {
+        float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
        float _122 = _96.x;
        float _123 = _96.y;
        spvUnsafeArray<float3, 8> _73;
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
        _158 = float3(500000.0);
        for (int _160 = 0; _160 < 8; )
        {
-            float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
+            float3 _166 = _73[_160] - _121;
            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
            _155 = fast::max(_155, _170);
            _158 = fast::min(_158, _170);
--- a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;

 void main()
 {
+    vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
    vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
    float _50 = _49.y;
+    float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
    float _55;
    float _58;
    _55 = 0.0;
@ -31,8 +33,8 @@ void main()
    for (int _60 = -3; _60 <= 3; )
    {
        float _64 = float(_60);
-        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
-        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
+        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
        _55 += (_72.x * _78);
        _58 += _78;
        _60++;
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -262,6 +262,29 @@ inline std::string convert_to_string(double t, char locale_radix_point)
 	return buf;
 }

+template <typename T>
+struct ValueSaver
+{
+	explicit ValueSaver(T &current_)
+		: current(current_)
+		, saved(current_)
+	{
+	}
+
+	void release()
+	{
+		current = saved;
+	}
+
+	~ValueSaver()
+	{
+		release();
+	}
+
+	T &current;
+	T saved;
+};
+
 #if defined(__clang__) || defined(__GNUC__)
 #pragma GCC diagnostic pop
 #elif defined(_MSC_VER)
@ -699,6 +722,9 @@ struct SPIRExpression : IVariant
 	// Used by access chain Store and Load since we read multiple expressions in this case.
 	SmallVector<ID> implied_read_expressions;

+	// The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads.
+	uint32_t emitted_loop_level = 0;
+
 	SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
 };

--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -4674,3 +4674,8 @@ bool Compiler::flush_phi_required(BlockID from, BlockID to) const
 			return true;
 	return false;
 }
+
+void Compiler::add_loop_level()
+{
+	current_loop_level++;
+}
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@ -513,9 +513,22 @@ protected:

 	SPIRFunction *current_function = nullptr;
 	SPIRBlock *current_block = nullptr;
+	uint32_t current_loop_level = 0;
 	std::unordered_set<VariableID> active_interface_variables;
 	bool check_active_interface_variables = false;

+	void add_loop_level();
+
+	void set_initializers(SPIRExpression &e)
+	{
+		e.emitted_loop_level = current_loop_level;
+	}
+
+	template <typename T>
+	void set_initializers(const T &)
+	{
+	}
+
 	// If our IDs are out of range here as part of opcodes, throw instead of
 	// undefined behavior.
 	template <typename T, typename... P>
@ -524,6 +537,7 @@ protected:
 		ir.add_typed_id(static_cast<Types>(T::type), id);
 		auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...);
 		var.self = id;
+		set_initializers(var);
 		return var;
 	}

--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -341,6 +341,7 @@ void CompilerGLSL::reset()

 	statement_count = 0;
 	indent = 0;
+	current_loop_level = 0;
 }

 void CompilerGLSL::remap_pls_variables()
@ -4547,6 +4548,17 @@ bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
 	return suppressed_usage_tracking.count(id) != 0;
 }

+bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
+{
+	auto *expr = maybe_get<SPIRExpression>(id);
+	if (!expr)
+		return false;
+
+	// If we're emitting code at a deeper loop level than when we emitted the expression,
+	// we're probably reading the same expression over and over.
+	return current_loop_level > expr->emitted_loop_level;
+}
+
 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
                                      bool suppress_usage_tracking)
 {
@ -8169,6 +8181,13 @@ void CompilerGLSL::track_expression_read(uint32_t id)
 		auto &v = expression_usage_counts[id];
 		v++;

+		// If we create an expression outside a loop,
+		// but access it inside a loop, we're implicitly reading it multiple times.
+		// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
+		// working inside the backend compiler.
+		if (expression_read_implies_multiple_reads(id))
+			v++;
+
 		if (v >= 2)
 		{
 			//if (v == 2)
@ -13000,6 +13019,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	bool skip_direct_branch = false;
 	bool emitted_loop_header_variables = false;
 	bool force_complex_continue_block = false;
+	ValueSaver<uint32_t> loop_level_saver(current_loop_level);
+
+	if (block.merge == SPIRBlock::MergeLoop)
+		add_loop_level();

 	emit_hoisted_temporaries(block.declare_temporary);

@ -13550,6 +13573,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		else
 			end_scope();

+		loop_level_saver.release();
+
 		// We cannot break out of two loops at once, so don't check for break; here.
 		// Using block.self as the "from" block isn't quite right, but it has the same scope
 		// and dominance structure, so it's fine.
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -549,6 +549,7 @@ protected:
 	void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	bool expression_is_forwarded(uint32_t id) const;
 	bool expression_suppresses_usage_tracking(uint32_t id) const;
+	bool expression_read_implies_multiple_reads(uint32_t id) const;
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
 	                        bool suppress_usage_tracking = false);