Implement context-sensitive expression read tracking.
When inside a loop, treat any read of outer expressions to happen multiple times, forcing a temporary of said outer expressions. This avoids the problem where we can end up relying on loop-invariant code motion to happen in the compiler when converting optimized shaders.
This commit is contained in:
parent
05188aca69
commit
3afbfdb090
@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
|
||||
vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
|
||||
float _50 = _49.y;
|
||||
float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
|
||||
float _55;
|
||||
float _58;
|
||||
_55 = 0.0;
|
||||
@ -31,8 +33,8 @@ void main()
|
||||
for (int _60 = -3; _60 <= 3; )
|
||||
{
|
||||
float _64 = float(_60);
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
|
||||
_55 += (_72.x * _78);
|
||||
_58 += _78;
|
||||
_60++;
|
||||
|
@ -14,12 +14,13 @@ layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
highp float _24 = 1.0 / float(textureSize(tex, 0).x);
|
||||
highp float _34 = dFdx(vertex.x);
|
||||
float _62;
|
||||
_62 = 0.0;
|
||||
for (float _61 = 0.0; _61 < _44.count; )
|
||||
{
|
||||
_62 += ((1.0 / float(textureSize(tex, 0).x)) * _34);
|
||||
_62 += (_24 * _34);
|
||||
_61 += 1.0;
|
||||
continue;
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
|
||||
{
|
||||
device foo* _46 = select_buffer(buf, cb);
|
||||
device foo* _45 = _46;
|
||||
thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb);
|
||||
device foo* _48 = _45;
|
||||
device int* _52;
|
||||
device int* _55;
|
||||
@ -46,7 +47,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
|
||||
_58 = *_55;
|
||||
if (_57 != _58)
|
||||
{
|
||||
int _66 = (_57 + _58) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x);
|
||||
int _66 = (_57 + _58) + int((*_47).x);
|
||||
*_52 = _66;
|
||||
*_55 = _66;
|
||||
_52 = &_52[1u];
|
||||
|
@ -125,6 +125,7 @@ void main()
|
||||
_151.UvStuds = IN_UvStuds_EdgeDistance2.xy;
|
||||
SurfaceInput _156 = _151;
|
||||
_156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25;
|
||||
float _160 = clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0);
|
||||
float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
|
||||
float _165 = clamp(1.0 - _163, 0.0, 1.0);
|
||||
vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0;
|
||||
@ -141,7 +142,7 @@ void main()
|
||||
else
|
||||
{
|
||||
float _180 = 1.0 / (1.0 - 0.0);
|
||||
_193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
|
||||
_193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0)));
|
||||
break;
|
||||
}
|
||||
_193 = _192;
|
||||
|
@ -216,6 +216,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w);
|
||||
float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation);
|
||||
float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz);
|
||||
float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0)));
|
||||
float2 _190 = (_187.xy * float2(2.0)) - float2(1.0);
|
||||
@ -330,7 +331,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
{
|
||||
if (_491 < _Globals.NumDynamicPointLights)
|
||||
{
|
||||
float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - (in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation));
|
||||
float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179;
|
||||
float _502 = dot(_501, _501);
|
||||
float3 _505 = _501 * float3(rsqrt(_502));
|
||||
_507 = normalize(_181 + _505);
|
||||
|
@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
float3 _151 = normalize(-_148);
|
||||
float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
|
||||
float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
|
||||
float _171 = floor(_170);
|
||||
float _172 = 1.0 / _170;
|
||||
float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
|
||||
float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
|
||||
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
float _189 = 1.0;
|
||||
for (;;)
|
||||
{
|
||||
if (float(_185) < (floor(_170) + 2.0))
|
||||
if (float(_185) < (_171 + 2.0))
|
||||
{
|
||||
_188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
|
||||
if (_180 < _188)
|
||||
|
@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
float3 _151 = normalize(-_148);
|
||||
float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
|
||||
float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
|
||||
float _171 = floor(_170);
|
||||
float _172 = 1.0 / _170;
|
||||
float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
|
||||
float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
|
||||
@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu
|
||||
float _189 = 1.0;
|
||||
for (;;)
|
||||
{
|
||||
if (float(_185) < (floor(_170) + 2.0))
|
||||
if (float(_185) < (_171 + 2.0))
|
||||
{
|
||||
_188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
|
||||
if (_180 < _188)
|
||||
|
@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
|
||||
uint _107 = _103 + 1u;
|
||||
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
|
||||
{
|
||||
float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
spvUnsafeArray<float3, 8> _73;
|
||||
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
|
||||
_158 = float3(500000.0);
|
||||
for (int _160 = 0; _160 < 8; )
|
||||
{
|
||||
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
|
||||
float3 _166 = _73[_160] - _121;
|
||||
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
|
||||
_155 = fast::max(_155, _170);
|
||||
_158 = fast::min(_158, _170);
|
||||
|
@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
|
||||
uint _107 = _103 + 1u;
|
||||
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
|
||||
{
|
||||
float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
spvUnsafeArray<float3, 8> _73;
|
||||
@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB
|
||||
_158 = float3(500000.0);
|
||||
for (int _160 = 0; _160 < 8; )
|
||||
{
|
||||
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
|
||||
float3 _166 = _73[_160] - _121;
|
||||
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
|
||||
_155 = fast::max(_155, _170);
|
||||
_158 = fast::min(_158, _170);
|
||||
|
@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
|
||||
vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
|
||||
float _50 = _49.y;
|
||||
float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
|
||||
float _55;
|
||||
float _58;
|
||||
_55 = 0.0;
|
||||
@ -31,8 +33,8 @@ void main()
|
||||
for (int _60 = -3; _60 <= 3; )
|
||||
{
|
||||
float _64 = float(_60);
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
|
||||
_55 += (_72.x * _78);
|
||||
_58 += _78;
|
||||
_60++;
|
||||
|
@ -262,6 +262,29 @@ inline std::string convert_to_string(double t, char locale_radix_point)
|
||||
return buf;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct ValueSaver
|
||||
{
|
||||
explicit ValueSaver(T ¤t_)
|
||||
: current(current_)
|
||||
, saved(current_)
|
||||
{
|
||||
}
|
||||
|
||||
void release()
|
||||
{
|
||||
current = saved;
|
||||
}
|
||||
|
||||
~ValueSaver()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
T ¤t;
|
||||
T saved;
|
||||
};
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#elif defined(_MSC_VER)
|
||||
@ -699,6 +722,9 @@ struct SPIRExpression : IVariant
|
||||
// Used by access chain Store and Load since we read multiple expressions in this case.
|
||||
SmallVector<ID> implied_read_expressions;
|
||||
|
||||
// The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads.
|
||||
uint32_t emitted_loop_level = 0;
|
||||
|
||||
SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
|
||||
};
|
||||
|
||||
|
@ -4674,3 +4674,8 @@ bool Compiler::flush_phi_required(BlockID from, BlockID to) const
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void Compiler::add_loop_level()
|
||||
{
|
||||
current_loop_level++;
|
||||
}
|
||||
|
@ -513,9 +513,22 @@ protected:
|
||||
|
||||
SPIRFunction *current_function = nullptr;
|
||||
SPIRBlock *current_block = nullptr;
|
||||
uint32_t current_loop_level = 0;
|
||||
std::unordered_set<VariableID> active_interface_variables;
|
||||
bool check_active_interface_variables = false;
|
||||
|
||||
void add_loop_level();
|
||||
|
||||
void set_initializers(SPIRExpression &e)
|
||||
{
|
||||
e.emitted_loop_level = current_loop_level;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set_initializers(const T &)
|
||||
{
|
||||
}
|
||||
|
||||
// If our IDs are out of range here as part of opcodes, throw instead of
|
||||
// undefined behavior.
|
||||
template <typename T, typename... P>
|
||||
@ -524,6 +537,7 @@ protected:
|
||||
ir.add_typed_id(static_cast<Types>(T::type), id);
|
||||
auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...);
|
||||
var.self = id;
|
||||
set_initializers(var);
|
||||
return var;
|
||||
}
|
||||
|
||||
|
@ -341,6 +341,7 @@ void CompilerGLSL::reset()
|
||||
|
||||
statement_count = 0;
|
||||
indent = 0;
|
||||
current_loop_level = 0;
|
||||
}
|
||||
|
||||
void CompilerGLSL::remap_pls_variables()
|
||||
@ -4547,6 +4548,17 @@ bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
|
||||
return suppressed_usage_tracking.count(id) != 0;
|
||||
}
|
||||
|
||||
bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
|
||||
{
|
||||
auto *expr = maybe_get<SPIRExpression>(id);
|
||||
if (!expr)
|
||||
return false;
|
||||
|
||||
// If we're emitting code at a deeper loop level than when we emitted the expression,
|
||||
// we're probably reading the same expression over and over.
|
||||
return current_loop_level > expr->emitted_loop_level;
|
||||
}
|
||||
|
||||
SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
|
||||
bool suppress_usage_tracking)
|
||||
{
|
||||
@ -8169,6 +8181,13 @@ void CompilerGLSL::track_expression_read(uint32_t id)
|
||||
auto &v = expression_usage_counts[id];
|
||||
v++;
|
||||
|
||||
// If we create an expression outside a loop,
|
||||
// but access it inside a loop, we're implicitly reading it multiple times.
|
||||
// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
|
||||
// working inside the backend compiler.
|
||||
if (expression_read_implies_multiple_reads(id))
|
||||
v++;
|
||||
|
||||
if (v >= 2)
|
||||
{
|
||||
//if (v == 2)
|
||||
@ -13000,6 +13019,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
bool skip_direct_branch = false;
|
||||
bool emitted_loop_header_variables = false;
|
||||
bool force_complex_continue_block = false;
|
||||
ValueSaver<uint32_t> loop_level_saver(current_loop_level);
|
||||
|
||||
if (block.merge == SPIRBlock::MergeLoop)
|
||||
add_loop_level();
|
||||
|
||||
emit_hoisted_temporaries(block.declare_temporary);
|
||||
|
||||
@ -13550,6 +13573,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
else
|
||||
end_scope();
|
||||
|
||||
loop_level_saver.release();
|
||||
|
||||
// We cannot break out of two loops at once, so don't check for break; here.
|
||||
// Using block.self as the "from" block isn't quite right, but it has the same scope
|
||||
// and dominance structure, so it's fine.
|
||||
|
@ -549,6 +549,7 @@ protected:
|
||||
void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
|
||||
bool expression_is_forwarded(uint32_t id) const;
|
||||
bool expression_suppresses_usage_tracking(uint32_t id) const;
|
||||
bool expression_read_implies_multiple_reads(uint32_t id) const;
|
||||
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
|
||||
bool suppress_usage_tracking = false);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user