Implement context-sensitive expression read tracking.

When inside a loop, treat any read of outer expressions to happen
multiple times, forcing a temporary of said outer expressions.
This avoids the problem where we can end up relying on loop-invariant code motion to happen in the
compiler when converting optimized shaders.
This commit is contained in:
Hans-Kristian Arntzen 2020-06-29 12:20:35 +02:00
parent 05188aca69
commit 3afbfdb090
15 changed files with 95 additions and 12 deletions

View File

@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
void main()
{
vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
float _50 = _49.y;
float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
float _55;
float _58;
_55 = 0.0;
@ -31,8 +33,8 @@ void main()
for (int _60 = -3; _60 <= 3; )
{
float _64 = float(_60);
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
_55 += (_72.x * _78);
_58 += _78;
_60++;

View File

@ -14,12 +14,13 @@ layout(location = 0) out vec4 fragColor;
void main()
{
highp float _24 = 1.0 / float(textureSize(tex, 0).x);
highp float _34 = dFdx(vertex.x);
float _62;
_62 = 0.0;
for (float _61 = 0.0; _61 < _44.count; )
{
_62 += ((1.0 / float(textureSize(tex, 0).x)) * _34);
_62 += (_24 * _34);
_61 += 1.0;
continue;
}

View File

@ -33,6 +33,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
{
device foo* _46 = select_buffer(buf, cb);
device foo* _45 = _46;
thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb);
device foo* _48 = _45;
device int* _52;
device int* _55;
@ -46,7 +47,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
_58 = *_55;
if (_57 != _58)
{
int _66 = (_57 + _58) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x);
int _66 = (_57 + _58) + int((*_47).x);
*_52 = _66;
*_55 = _66;
_52 = &_52[1u];

View File

@ -125,6 +125,7 @@ void main()
_151.UvStuds = IN_UvStuds_EdgeDistance2.xy;
SurfaceInput _156 = _151;
_156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25;
float _160 = clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0);
float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
float _165 = clamp(1.0 - _163, 0.0, 1.0);
vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0;
@ -141,7 +142,7 @@ void main()
else
{
float _180 = 1.0 / (1.0 - 0.0);
_193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
_193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0)));
break;
}
_193 = _192;

View File

@ -216,6 +216,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
{
main0_out out = {};
float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w);
float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation);
float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz);
float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0)));
float2 _190 = (_187.xy * float2(2.0)) - float2(1.0);
@ -330,7 +331,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
{
if (_491 < _Globals.NumDynamicPointLights)
{
float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - (in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation));
float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179;
float _502 = dot(_501, _501);
float3 _505 = _501 * float3(rsqrt(_502));
_507 = normalize(_181 + _505);

View File

@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
float3 _151 = normalize(-_148);
float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
float _171 = floor(_170);
float _172 = 1.0 / _170;
float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
float _189 = 1.0;
for (;;)
{
if (float(_185) < (floor(_170) + 2.0))
if (float(_185) < (_171 + 2.0))
{
_188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
if (_180 < _188)

View File

@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
float3 _151 = normalize(-_148);
float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
float _171 = floor(_170);
float _172 = 1.0 / _170;
float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
float _189 = 1.0;
for (;;)
{
if (float(_185) < (floor(_170) + 2.0))
if (float(_185) < (_171 + 2.0))
{
_188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
if (_180 < _188)

View File

@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
uint _107 = _103 + 1u;
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
{
float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
float _122 = _96.x;
float _123 = _96.y;
spvUnsafeArray<float3, 8> _73;
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
_158 = float3(500000.0);
for (int _160 = 0; _160 < 8; )
{
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
float3 _166 = _73[_160] - _121;
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
_155 = fast::max(_155, _170);
_158 = fast::min(_158, _170);

View File

@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
uint _107 = _103 + 1u;
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
{
float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
float _122 = _96.x;
float _123 = _96.y;
spvUnsafeArray<float3, 8> _73;
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
_158 = float3(500000.0);
for (int _160 = 0; _160 < 8; )
{
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
float3 _166 = _73[_160] - _121;
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
_155 = fast::max(_155, _170);
_158 = fast::min(_158, _170);

View File

@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
void main()
{
vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
float _50 = _49.y;
float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
float _55;
float _58;
_55 = 0.0;
@ -31,8 +33,8 @@ void main()
for (int _60 = -3; _60 <= 3; )
{
float _64 = float(_60);
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
_55 += (_72.x * _78);
_58 += _78;
_60++;

View File

@ -262,6 +262,29 @@ inline std::string convert_to_string(double t, char locale_radix_point)
return buf;
}
template <typename T>
struct ValueSaver
{
explicit ValueSaver(T &current_)
: current(current_)
, saved(current_)
{
}
void release()
{
current = saved;
}
~ValueSaver()
{
release();
}
T &current;
T saved;
};
#if defined(__clang__) || defined(__GNUC__)
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
@ -699,6 +722,9 @@ struct SPIRExpression : IVariant
// Used by access chain Store and Load since we read multiple expressions in this case.
SmallVector<ID> implied_read_expressions;
// The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads.
uint32_t emitted_loop_level = 0;
SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
};

View File

@ -4674,3 +4674,8 @@ bool Compiler::flush_phi_required(BlockID from, BlockID to) const
return true;
return false;
}
void Compiler::add_loop_level()
{
current_loop_level++;
}

View File

@ -513,9 +513,22 @@ protected:
SPIRFunction *current_function = nullptr;
SPIRBlock *current_block = nullptr;
uint32_t current_loop_level = 0;
std::unordered_set<VariableID> active_interface_variables;
bool check_active_interface_variables = false;
void add_loop_level();
void set_initializers(SPIRExpression &e)
{
e.emitted_loop_level = current_loop_level;
}
template <typename T>
void set_initializers(const T &)
{
}
// If our IDs are out of range here as part of opcodes, throw instead of
// undefined behavior.
template <typename T, typename... P>
@ -524,6 +537,7 @@ protected:
ir.add_typed_id(static_cast<Types>(T::type), id);
auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...);
var.self = id;
set_initializers(var);
return var;
}

View File

@ -341,6 +341,7 @@ void CompilerGLSL::reset()
statement_count = 0;
indent = 0;
current_loop_level = 0;
}
void CompilerGLSL::remap_pls_variables()
@ -4547,6 +4548,17 @@ bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
return suppressed_usage_tracking.count(id) != 0;
}
bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
{
auto *expr = maybe_get<SPIRExpression>(id);
if (!expr)
return false;
// If we're emitting code at a deeper loop level than when we emitted the expression,
// we're probably reading the same expression over and over.
return current_loop_level > expr->emitted_loop_level;
}
SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
bool suppress_usage_tracking)
{
@ -8169,6 +8181,13 @@ void CompilerGLSL::track_expression_read(uint32_t id)
auto &v = expression_usage_counts[id];
v++;
// If we create an expression outside a loop,
// but access it inside a loop, we're implicitly reading it multiple times.
// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
// working inside the backend compiler.
if (expression_read_implies_multiple_reads(id))
v++;
if (v >= 2)
{
//if (v == 2)
@ -13000,6 +13019,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
bool skip_direct_branch = false;
bool emitted_loop_header_variables = false;
bool force_complex_continue_block = false;
ValueSaver<uint32_t> loop_level_saver(current_loop_level);
if (block.merge == SPIRBlock::MergeLoop)
add_loop_level();
emit_hoisted_temporaries(block.declare_temporary);
@ -13550,6 +13573,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
else
end_scope();
loop_level_saver.release();
// We cannot break out of two loops at once, so don't check for break; here.
// Using block.self as the "from" block isn't quite right, but it has the same scope
// and dominance structure, so it's fine.

View File

@ -549,6 +549,7 @@ protected:
void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
bool expression_is_forwarded(uint32_t id) const;
bool expression_suppresses_usage_tracking(uint32_t id) const;
bool expression_read_implies_multiple_reads(uint32_t id) const;
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
bool suppress_usage_tracking = false);