Optimize for read-modify-writes.

Required for legacy loop increments.
This commit is contained in:
Hans-Kristian Arntzen 2016-12-16 13:14:22 +01:00
parent a714d424d0
commit 62613df5a5
15 changed files with 90 additions and 66 deletions

View File

@ -64,7 +64,7 @@ void test()
}
}
float h;
for (int i = 0; i < 20; i = i + 1, h = h + 10.0)
for (int i = 0; i < 20; i += 1, h += 10.0)
{
}
_11.data = h;

View File

@ -22,7 +22,7 @@ void main()
do
{
idat = _28.mvp * idat;
i = i + 1;
i += 1;
} while (i < 16);
_52.out_data[ident] = idat;
}

View File

@ -35,10 +35,10 @@ void baz(out Foo foo)
void meow(inout Foo foo)
{
foo.a = foo.a + vec4(10.0);
foo.b = foo.b + vec4(20.0);
foo.c = foo.c + vec4(30.0);
foo.d = foo.d + vec4(40.0);
foo.a += vec4(10.0);
foo.b += vec4(20.0);
foo.c += vec4(30.0);
foo.d += vec4(40.0);
}
vec4 bar(Foo foo)

View File

@ -22,8 +22,8 @@ void main()
{
do
{
k = k * 2;
i = i + uint(1);
k *= 2;
i += uint(1);
} while (i < ident);
}
switch (k)
@ -32,7 +32,7 @@ void main()
{
for (;;)
{
i = i + uint(1);
i += uint(1);
if (i > 10u)
{
break;
@ -45,7 +45,7 @@ void main()
{
for (;;)
{
i = i + 2u;
i += 2u;
if (i > 20u)
{
break;
@ -57,12 +57,12 @@ void main()
}
while (k < 10)
{
idat = idat * 2.0;
k = k + 1;
idat *= 2.0;
k += 1;
}
for (uint i_1 = 0u; i_1 < 16u; i_1 = i_1 + uint(1), k = k + 1)
for (uint i_1 = 0u; i_1 < 16u; i_1 += uint(1), k += 1)
{
for (uint j = 0u; j < 30u; j = j + uint(1))
for (uint j = 0u; j < 30u; j += uint(1))
{
idat = _24.mvp * idat;
}
@ -70,34 +70,34 @@ void main()
k = 0;
for (;;)
{
k = k + 1;
k += 1;
if (k > 10)
{
k = k + 2;
k += 2;
}
else
{
k = k + 3;
k += 3;
continue;
}
k = k + 10;
k += 10;
continue;
}
k = 0;
do
{
k = k + 1;
k += 1;
} while (k > 10);
int l = 0;
for (;;)
{
if (l == 5)
{
l = l + 1;
l += 1;
continue;
}
idat = idat + vec4(1.0);
l = l + 1;
idat += vec4(1.0);
l += 1;
continue;
}
_177.out_data[ident] = idat;

View File

@ -21,7 +21,7 @@ void main()
return;
}
}
for (int i = 0; i < 20; i = i + 1)
for (int i = 0; i < 20; i += 1)
{
if (i == 10)
{

View File

@ -24,8 +24,8 @@ void main()
k = _40;
if (_40 < 10)
{
idat = idat * 2.0;
k = k + 1;
idat *= 2.0;
k += 1;
continue;
}
else
@ -33,16 +33,16 @@ void main()
break;
}
}
for (uint i = 0u; i < 16u; i = i + uint(1), k = k + 1)
for (uint i = 0u; i < 16u; i += uint(1), k += 1)
{
for (uint j = 0u; j < 30u; j = j + uint(1))
for (uint j = 0u; j < 30u; j += uint(1))
{
idat = _24.mvp * idat;
}
}
do
{
k = k + 1;
k += 1;
} while (k > 10);
_89.out_data[ident] = idat;
}

View File

@ -37,8 +37,8 @@ layout(binding = 3, std140) buffer SSBO3
void main()
{
ssbo_0.a = ssbo_0.a + dvec4(10.0lf, 20.0lf, 30.0lf, 40.0lf);
ssbo_0.a = ssbo_0.a + dvec4(20.0lf);
ssbo_0.a += dvec4(10.0lf, 20.0lf, 30.0lf, 40.0lf);
ssbo_0.a += dvec4(20.0lf);
dvec4 a = ssbo_0.a;
dmat4 amat = ssbo_0.b;
ssbo_0.a = abs(a);
@ -77,8 +77,8 @@ void main()
k = lessThanEqual(a, a);
k = greaterThan(a, a);
k = greaterThanEqual(a, a);
ssbo_1.b.x = ssbo_1.b.x + 1.0lf;
ssbo_2.b[0].x = ssbo_2.b[0].x + 1.0lf;
ssbo_3.b[0].x = ssbo_3.b[0].x + 1.0lf;
ssbo_1.b.x += 1.0lf;
ssbo_2.b[0].x += 1.0lf;
ssbo_3.b[0].x += 1.0lf;
}

View File

@ -36,17 +36,17 @@ layout(binding = 3, std140) buffer SSBO3
void main()
{
ssbo_0.a = ssbo_0.a + i64vec4(10l, 20l, 30l, 40l);
ssbo_1.b = ssbo_1.b + u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
ssbo_0.a = ssbo_0.a + i64vec4(20l);
ssbo_0.a += i64vec4(10l, 20l, 30l, 40l);
ssbo_1.b += u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
ssbo_0.a += i64vec4(20l);
ssbo_0.a = abs(ssbo_0.a + i64vec4(ssbo_1.b));
ssbo_0.a = ssbo_0.a + i64vec4(1l);
ssbo_1.b = ssbo_1.b + u64vec4(i64vec4(1l));
ssbo_0.a = ssbo_0.a - i64vec4(1l);
ssbo_1.b = ssbo_1.b - u64vec4(i64vec4(1l));
ssbo_0.a += i64vec4(1l);
ssbo_1.b += u64vec4(i64vec4(1l));
ssbo_0.a -= i64vec4(1l);
ssbo_1.b -= u64vec4(i64vec4(1l));
ssbo_1.b = doubleBitsToUint64(int64BitsToDouble(ssbo_0.a));
ssbo_0.a = doubleBitsToInt64(uint64BitsToDouble(ssbo_1.b));
ssbo_2.a[0] = ssbo_2.a[0] + 1l;
ssbo_3.a[0] = ssbo_3.a[0] + 2l;
ssbo_2.a[0] += 1l;
ssbo_3.a[0] += 2l;
}

View File

@ -7,46 +7,46 @@ layout(location = 0) out mediump int FragColor;
void main()
{
FragColor = 15;
for (mediump int i = 0; i < 25; i = i + 1)
for (mediump int i = 0; i < 25; i += 1)
{
FragColor = FragColor + 10;
FragColor += 10;
}
for (mediump int j = 4, i_1 = 1; i_1 < 30; i_1 = i_1 + 1, j = j + 4)
for (mediump int j = 4, i_1 = 1; i_1 < 30; i_1 += 1, j += 4)
{
FragColor = FragColor + 11;
FragColor += 11;
}
mediump int k = 0;
for (; k < 20; k = k + 1)
for (; k < 20; k += 1)
{
FragColor = FragColor + 12;
FragColor += 12;
}
k = k + 3;
FragColor = FragColor + k;
k += 3;
FragColor += k;
mediump int l;
if (k == 40)
{
l = 0;
for (; l < 40; l = l + 1)
for (; l < 40; l += 1)
{
FragColor = FragColor + 13;
FragColor += 13;
}
return;
}
else
{
l = k;
FragColor = FragColor + l;
FragColor += l;
}
mediump ivec2 i_2 = ivec2(0);
for (; i_2.x < 10; i_2.x = i_2.x + 1)
for (; i_2.x < 10; i_2.x += 1)
{
FragColor = FragColor + i_2.y;
FragColor += i_2.y;
}
mediump int o = k;
for (mediump int m = k; m < 40; m = m + 1)
for (mediump int m = k; m < 40; m += 1)
{
FragColor = FragColor + m;
FragColor += m;
}
FragColor = FragColor + o;
FragColor += o;
}

View File

@ -51,7 +51,7 @@ void main()
vec3 base = mix(grass, snow, vec3(grass_snow));
float edge = smoothstep(0.699999988079071044921875, 0.75, Normal.y);
Color = mix(dirt, base, vec3(edge));
Color = Color * Color;
Color *= Color;
float Roughness = 1.0 - (edge * grass_snow);
highp vec3 param_1 = Color;
highp vec3 param_2 = Normal;

View File

@ -46,7 +46,7 @@ void main()
vec2 param_1 = tess_coord;
mediump vec2 lod = lod_factor(param_1);
vec2 tex = pos * _31.uInvHeightmapSize;
pos = pos * _31.uScale.xy;
pos *= _31.uScale.xy;
mediump float delta_mod = exp2(lod.x);
vec2 off = _31.uInvHeightmapSize * delta_mod;
vGradNormalTex = vec4(tex + (_31.uInvHeightmapSize * 0.5), tex * _31.uScale.zw);
@ -54,7 +54,7 @@ void main()
vec2 param_3 = off;
vec2 param_4 = lod;
vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4);
pos = pos + height_displacement.yz;
pos += height_displacement.yz;
vWorld = vec3(pos.x, height_displacement.x, pos.y);
gl_Position = _31.uMVP * vec4(vWorld, 1.0);
}

View File

@ -101,8 +101,8 @@ void main()
vec2 Offset = _381.InvGroundSize_PatchScale.xy * exp2(lod.x);
float Elevation = mix(textureLod(TexHeightmap, NormalizedPos + (Offset * 0.5), lod.x).x, textureLod(TexHeightmap, NormalizedPos + (Offset * 1.0), lod.x + 1.0).x, lod.y);
vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y);
WorldPos = WorldPos * _381.GroundScale.xyz;
WorldPos = WorldPos + _381.GroundPosition.xyz;
WorldPos *= _381.GroundScale.xyz;
WorldPos += _381.GroundPosition.xyz;
EyeVec = WorldPos - _58.g_CamPos.xyz;
TexCoord = NormalizedPos + (_381.InvGroundSize_PatchScale.xy * 0.5);
gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3;

View File

@ -124,8 +124,8 @@ void main()
vec2 Offset = (_405.InvOceanSize_PatchScale.xy * exp2(lod.x)) * _405.NormalTexCoordScale.zw;
vec3 Displacement = mix(textureLod(TexDisplacement, NormalizedTex + (Offset * 0.5), lod.x).yxz, textureLod(TexDisplacement, NormalizedTex + (Offset * 1.0), lod.x + 1.0).yxz, vec3(lod.y));
vec3 WorldPos = vec3(NormalizedPos.x, 0.0, NormalizedPos.y) + Displacement;
WorldPos = WorldPos * _405.OceanScale.xyz;
WorldPos = WorldPos + _405.OceanPosition.xyz;
WorldPos *= _405.OceanScale.xyz;
WorldPos += _405.OceanPosition.xyz;
EyeVec = WorldPos - _58.g_CamPos.xyz;
TexCoord = vec4(NormalizedTex, NormalizedTex * _405.NormalTexCoordScale.xy) + ((_405.InvOceanSize_PatchScale.xyxy * 0.5) * _405.NormalTexCoordScale.zwzw);
gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3;

View File

@ -3426,6 +3426,23 @@ bool CompilerGLSL::skip_argument(uint32_t id) const
return false;
}
bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs)
{
// Do this with strings because we have a very clear pattern we can check for and it avoids
// adding lots of special cases to the code emission.
auto index = rhs.find(lhs);
if (index != 0)
return false;
// TODO: Shift operators, but it's not important for now.
auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
if (op != lhs.size() + 1)
return false;
statement(lhs, " ", rhs[op], "=", rhs.substr(lhs.size() + 2), ";");
return true;
}
void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
auto ops = stream(instruction);
@ -3503,7 +3520,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// For this case, we don't need to invalidate anything and emit any opcode.
if (lhs != rhs)
{
statement(lhs, " = ", rhs, ";");
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
// variable increments must be in either i++ or i += const-expr.
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
if (!optimize_read_modify_write(lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(ops[0]);
}
}

View File

@ -390,6 +390,8 @@ protected:
void find_static_extensions();
std::string emit_for_loop_initializers(const SPIRBlock &block);
bool optimize_read_modify_write(const std::string &lhs, const std::string &rhs);
};
}