Optimize for read-modify-writes.
Required for legacy loop increments.
This commit is contained in:
parent
a714d424d0
commit
62613df5a5
@ -64,7 +64,7 @@ void test()
|
||||
}
|
||||
}
|
||||
float h;
|
||||
for (int i = 0; i < 20; i = i + 1, h = h + 10.0)
|
||||
for (int i = 0; i < 20; i += 1, h += 10.0)
|
||||
{
|
||||
}
|
||||
_11.data = h;
|
||||
|
@ -22,7 +22,7 @@ void main()
|
||||
do
|
||||
{
|
||||
idat = _28.mvp * idat;
|
||||
i = i + 1;
|
||||
i += 1;
|
||||
} while (i < 16);
|
||||
_52.out_data[ident] = idat;
|
||||
}
|
||||
|
@ -35,10 +35,10 @@ void baz(out Foo foo)
|
||||
|
||||
void meow(inout Foo foo)
|
||||
{
|
||||
foo.a = foo.a + vec4(10.0);
|
||||
foo.b = foo.b + vec4(20.0);
|
||||
foo.c = foo.c + vec4(30.0);
|
||||
foo.d = foo.d + vec4(40.0);
|
||||
foo.a += vec4(10.0);
|
||||
foo.b += vec4(20.0);
|
||||
foo.c += vec4(30.0);
|
||||
foo.d += vec4(40.0);
|
||||
}
|
||||
|
||||
vec4 bar(Foo foo)
|
||||
|
@ -22,8 +22,8 @@ void main()
|
||||
{
|
||||
do
|
||||
{
|
||||
k = k * 2;
|
||||
i = i + uint(1);
|
||||
k *= 2;
|
||||
i += uint(1);
|
||||
} while (i < ident);
|
||||
}
|
||||
switch (k)
|
||||
@ -32,7 +32,7 @@ void main()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
i = i + uint(1);
|
||||
i += uint(1);
|
||||
if (i > 10u)
|
||||
{
|
||||
break;
|
||||
@ -45,7 +45,7 @@ void main()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
i = i + 2u;
|
||||
i += 2u;
|
||||
if (i > 20u)
|
||||
{
|
||||
break;
|
||||
@ -57,12 +57,12 @@ void main()
|
||||
}
|
||||
while (k < 10)
|
||||
{
|
||||
idat = idat * 2.0;
|
||||
k = k + 1;
|
||||
idat *= 2.0;
|
||||
k += 1;
|
||||
}
|
||||
for (uint i_1 = 0u; i_1 < 16u; i_1 = i_1 + uint(1), k = k + 1)
|
||||
for (uint i_1 = 0u; i_1 < 16u; i_1 += uint(1), k += 1)
|
||||
{
|
||||
for (uint j = 0u; j < 30u; j = j + uint(1))
|
||||
for (uint j = 0u; j < 30u; j += uint(1))
|
||||
{
|
||||
idat = _24.mvp * idat;
|
||||
}
|
||||
@ -70,34 +70,34 @@ void main()
|
||||
k = 0;
|
||||
for (;;)
|
||||
{
|
||||
k = k + 1;
|
||||
k += 1;
|
||||
if (k > 10)
|
||||
{
|
||||
k = k + 2;
|
||||
k += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
k = k + 3;
|
||||
k += 3;
|
||||
continue;
|
||||
}
|
||||
k = k + 10;
|
||||
k += 10;
|
||||
continue;
|
||||
}
|
||||
k = 0;
|
||||
do
|
||||
{
|
||||
k = k + 1;
|
||||
k += 1;
|
||||
} while (k > 10);
|
||||
int l = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (l == 5)
|
||||
{
|
||||
l = l + 1;
|
||||
l += 1;
|
||||
continue;
|
||||
}
|
||||
idat = idat + vec4(1.0);
|
||||
l = l + 1;
|
||||
idat += vec4(1.0);
|
||||
l += 1;
|
||||
continue;
|
||||
}
|
||||
_177.out_data[ident] = idat;
|
||||
|
@ -21,7 +21,7 @@ void main()
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 20; i = i + 1)
|
||||
for (int i = 0; i < 20; i += 1)
|
||||
{
|
||||
if (i == 10)
|
||||
{
|
||||
|
@ -24,8 +24,8 @@ void main()
|
||||
k = _40;
|
||||
if (_40 < 10)
|
||||
{
|
||||
idat = idat * 2.0;
|
||||
k = k + 1;
|
||||
idat *= 2.0;
|
||||
k += 1;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
@ -33,16 +33,16 @@ void main()
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (uint i = 0u; i < 16u; i = i + uint(1), k = k + 1)
|
||||
for (uint i = 0u; i < 16u; i += uint(1), k += 1)
|
||||
{
|
||||
for (uint j = 0u; j < 30u; j = j + uint(1))
|
||||
for (uint j = 0u; j < 30u; j += uint(1))
|
||||
{
|
||||
idat = _24.mvp * idat;
|
||||
}
|
||||
}
|
||||
do
|
||||
{
|
||||
k = k + 1;
|
||||
k += 1;
|
||||
} while (k > 10);
|
||||
_89.out_data[ident] = idat;
|
||||
}
|
||||
|
@ -37,8 +37,8 @@ layout(binding = 3, std140) buffer SSBO3
|
||||
|
||||
void main()
|
||||
{
|
||||
ssbo_0.a = ssbo_0.a + dvec4(10.0lf, 20.0lf, 30.0lf, 40.0lf);
|
||||
ssbo_0.a = ssbo_0.a + dvec4(20.0lf);
|
||||
ssbo_0.a += dvec4(10.0lf, 20.0lf, 30.0lf, 40.0lf);
|
||||
ssbo_0.a += dvec4(20.0lf);
|
||||
dvec4 a = ssbo_0.a;
|
||||
dmat4 amat = ssbo_0.b;
|
||||
ssbo_0.a = abs(a);
|
||||
@ -77,8 +77,8 @@ void main()
|
||||
k = lessThanEqual(a, a);
|
||||
k = greaterThan(a, a);
|
||||
k = greaterThanEqual(a, a);
|
||||
ssbo_1.b.x = ssbo_1.b.x + 1.0lf;
|
||||
ssbo_2.b[0].x = ssbo_2.b[0].x + 1.0lf;
|
||||
ssbo_3.b[0].x = ssbo_3.b[0].x + 1.0lf;
|
||||
ssbo_1.b.x += 1.0lf;
|
||||
ssbo_2.b[0].x += 1.0lf;
|
||||
ssbo_3.b[0].x += 1.0lf;
|
||||
}
|
||||
|
||||
|
@ -36,17 +36,17 @@ layout(binding = 3, std140) buffer SSBO3
|
||||
|
||||
void main()
|
||||
{
|
||||
ssbo_0.a = ssbo_0.a + i64vec4(10l, 20l, 30l, 40l);
|
||||
ssbo_1.b = ssbo_1.b + u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
|
||||
ssbo_0.a = ssbo_0.a + i64vec4(20l);
|
||||
ssbo_0.a += i64vec4(10l, 20l, 30l, 40l);
|
||||
ssbo_1.b += u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
|
||||
ssbo_0.a += i64vec4(20l);
|
||||
ssbo_0.a = abs(ssbo_0.a + i64vec4(ssbo_1.b));
|
||||
ssbo_0.a = ssbo_0.a + i64vec4(1l);
|
||||
ssbo_1.b = ssbo_1.b + u64vec4(i64vec4(1l));
|
||||
ssbo_0.a = ssbo_0.a - i64vec4(1l);
|
||||
ssbo_1.b = ssbo_1.b - u64vec4(i64vec4(1l));
|
||||
ssbo_0.a += i64vec4(1l);
|
||||
ssbo_1.b += u64vec4(i64vec4(1l));
|
||||
ssbo_0.a -= i64vec4(1l);
|
||||
ssbo_1.b -= u64vec4(i64vec4(1l));
|
||||
ssbo_1.b = doubleBitsToUint64(int64BitsToDouble(ssbo_0.a));
|
||||
ssbo_0.a = doubleBitsToInt64(uint64BitsToDouble(ssbo_1.b));
|
||||
ssbo_2.a[0] = ssbo_2.a[0] + 1l;
|
||||
ssbo_3.a[0] = ssbo_3.a[0] + 2l;
|
||||
ssbo_2.a[0] += 1l;
|
||||
ssbo_3.a[0] += 2l;
|
||||
}
|
||||
|
||||
|
@ -7,46 +7,46 @@ layout(location = 0) out mediump int FragColor;
|
||||
void main()
|
||||
{
|
||||
FragColor = 15;
|
||||
for (mediump int i = 0; i < 25; i = i + 1)
|
||||
for (mediump int i = 0; i < 25; i += 1)
|
||||
{
|
||||
FragColor = FragColor + 10;
|
||||
FragColor += 10;
|
||||
}
|
||||
for (mediump int j = 4, i_1 = 1; i_1 < 30; i_1 = i_1 + 1, j = j + 4)
|
||||
for (mediump int j = 4, i_1 = 1; i_1 < 30; i_1 += 1, j += 4)
|
||||
{
|
||||
FragColor = FragColor + 11;
|
||||
FragColor += 11;
|
||||
}
|
||||
mediump int k = 0;
|
||||
for (; k < 20; k = k + 1)
|
||||
for (; k < 20; k += 1)
|
||||
{
|
||||
FragColor = FragColor + 12;
|
||||
FragColor += 12;
|
||||
}
|
||||
k = k + 3;
|
||||
FragColor = FragColor + k;
|
||||
k += 3;
|
||||
FragColor += k;
|
||||
mediump int l;
|
||||
if (k == 40)
|
||||
{
|
||||
l = 0;
|
||||
for (; l < 40; l = l + 1)
|
||||
for (; l < 40; l += 1)
|
||||
{
|
||||
FragColor = FragColor + 13;
|
||||
FragColor += 13;
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
l = k;
|
||||
FragColor = FragColor + l;
|
||||
FragColor += l;
|
||||
}
|
||||
mediump ivec2 i_2 = ivec2(0);
|
||||
for (; i_2.x < 10; i_2.x = i_2.x + 1)
|
||||
for (; i_2.x < 10; i_2.x += 1)
|
||||
{
|
||||
FragColor = FragColor + i_2.y;
|
||||
FragColor += i_2.y;
|
||||
}
|
||||
mediump int o = k;
|
||||
for (mediump int m = k; m < 40; m = m + 1)
|
||||
for (mediump int m = k; m < 40; m += 1)
|
||||
{
|
||||
FragColor = FragColor + m;
|
||||
FragColor += m;
|
||||
}
|
||||
FragColor = FragColor + o;
|
||||
FragColor += o;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ void main()
|
||||
vec3 base = mix(grass, snow, vec3(grass_snow));
|
||||
float edge = smoothstep(0.699999988079071044921875, 0.75, Normal.y);
|
||||
Color = mix(dirt, base, vec3(edge));
|
||||
Color = Color * Color;
|
||||
Color *= Color;
|
||||
float Roughness = 1.0 - (edge * grass_snow);
|
||||
highp vec3 param_1 = Color;
|
||||
highp vec3 param_2 = Normal;
|
||||
|
@ -46,7 +46,7 @@ void main()
|
||||
vec2 param_1 = tess_coord;
|
||||
mediump vec2 lod = lod_factor(param_1);
|
||||
vec2 tex = pos * _31.uInvHeightmapSize;
|
||||
pos = pos * _31.uScale.xy;
|
||||
pos *= _31.uScale.xy;
|
||||
mediump float delta_mod = exp2(lod.x);
|
||||
vec2 off = _31.uInvHeightmapSize * delta_mod;
|
||||
vGradNormalTex = vec4(tex + (_31.uInvHeightmapSize * 0.5), tex * _31.uScale.zw);
|
||||
@ -54,7 +54,7 @@ void main()
|
||||
vec2 param_3 = off;
|
||||
vec2 param_4 = lod;
|
||||
vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4);
|
||||
pos = pos + height_displacement.yz;
|
||||
pos += height_displacement.yz;
|
||||
vWorld = vec3(pos.x, height_displacement.x, pos.y);
|
||||
gl_Position = _31.uMVP * vec4(vWorld, 1.0);
|
||||
}
|
||||
|
@ -101,8 +101,8 @@ void main()
|
||||
vec2 Offset = _381.InvGroundSize_PatchScale.xy * exp2(lod.x);
|
||||
float Elevation = mix(textureLod(TexHeightmap, NormalizedPos + (Offset * 0.5), lod.x).x, textureLod(TexHeightmap, NormalizedPos + (Offset * 1.0), lod.x + 1.0).x, lod.y);
|
||||
vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y);
|
||||
WorldPos = WorldPos * _381.GroundScale.xyz;
|
||||
WorldPos = WorldPos + _381.GroundPosition.xyz;
|
||||
WorldPos *= _381.GroundScale.xyz;
|
||||
WorldPos += _381.GroundPosition.xyz;
|
||||
EyeVec = WorldPos - _58.g_CamPos.xyz;
|
||||
TexCoord = NormalizedPos + (_381.InvGroundSize_PatchScale.xy * 0.5);
|
||||
gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3;
|
||||
|
@ -124,8 +124,8 @@ void main()
|
||||
vec2 Offset = (_405.InvOceanSize_PatchScale.xy * exp2(lod.x)) * _405.NormalTexCoordScale.zw;
|
||||
vec3 Displacement = mix(textureLod(TexDisplacement, NormalizedTex + (Offset * 0.5), lod.x).yxz, textureLod(TexDisplacement, NormalizedTex + (Offset * 1.0), lod.x + 1.0).yxz, vec3(lod.y));
|
||||
vec3 WorldPos = vec3(NormalizedPos.x, 0.0, NormalizedPos.y) + Displacement;
|
||||
WorldPos = WorldPos * _405.OceanScale.xyz;
|
||||
WorldPos = WorldPos + _405.OceanPosition.xyz;
|
||||
WorldPos *= _405.OceanScale.xyz;
|
||||
WorldPos += _405.OceanPosition.xyz;
|
||||
EyeVec = WorldPos - _58.g_CamPos.xyz;
|
||||
TexCoord = vec4(NormalizedTex, NormalizedTex * _405.NormalTexCoordScale.xy) + ((_405.InvOceanSize_PatchScale.xyxy * 0.5) * _405.NormalTexCoordScale.zwzw);
|
||||
gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3;
|
||||
|
@ -3426,6 +3426,23 @@ bool CompilerGLSL::skip_argument(uint32_t id) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs)
|
||||
{
|
||||
// Do this with strings because we have a very clear pattern we can check for and it avoids
|
||||
// adding lots of special cases to the code emission.
|
||||
auto index = rhs.find(lhs);
|
||||
if (index != 0)
|
||||
return false;
|
||||
|
||||
// TODO: Shift operators, but it's not important for now.
|
||||
auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
|
||||
if (op != lhs.size() + 1)
|
||||
return false;
|
||||
|
||||
statement(lhs, " ", rhs[op], "=", rhs.substr(lhs.size() + 2), ";");
|
||||
return true;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -3503,7 +3520,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
// For this case, we don't need to invalidate anything and emit any opcode.
|
||||
if (lhs != rhs)
|
||||
{
|
||||
statement(lhs, " = ", rhs, ";");
|
||||
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
|
||||
// While this is purely cosmetic, this is important for legacy ESSL where loop
|
||||
// variable increments must be in either i++ or i += const-expr.
|
||||
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
|
||||
if (!optimize_read_modify_write(lhs, rhs))
|
||||
statement(lhs, " = ", rhs, ";");
|
||||
register_write(ops[0]);
|
||||
}
|
||||
}
|
||||
|
@ -390,6 +390,8 @@ protected:
|
||||
void find_static_extensions();
|
||||
|
||||
std::string emit_for_loop_initializers(const SPIRBlock &block);
|
||||
|
||||
bool optimize_read_modify_write(const std::string &lhs, const std::string &rhs);
|
||||
};
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user