Merge pull request #71 from KhronosGroup/cfg-analysis

WIP: Add control flow graph analysis for variable scoping
2016-11-21 09:11:54 +01:00 · 2016-11-21 09:11:54 +01:00 · 810fa633d0
commit 810fa633d0
parent 69761cc80f b847c88559
23 changed files with 852 additions and 78 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -27,7 +27,9 @@ add_library(spirv-cross-core STATIC
 		${CMAKE_CURRENT_SOURCE_DIR}/spirv_common.hpp
 		${CMAKE_CURRENT_SOURCE_DIR}/spirv.hpp
 		${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross.hpp
-	${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross.cpp)
+		${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross.cpp
+		${CMAKE_CURRENT_SOURCE_DIR}/spirv_cfg.hpp
+		${CMAKE_CURRENT_SOURCE_DIR}/spirv_cfg.cpp)

 add_library(spirv-cross-glsl STATIC
 		${CMAKE_CURRENT_SOURCE_DIR}/spirv_glsl.cpp
--- a/main.cpp
+++ b/main.cpp
@ -401,11 +401,13 @@ struct CLIArguments
 	bool metal = false;
 	bool vulkan_semantics = false;
 	bool remove_unused = false;
+	bool cfg_analysis = true;
 };

 static void print_help()
 {
-	fprintf(stderr, "Usage: spirv-cross [--output <output path>] [SPIR-V file] [--es] [--no-es] [--version <GLSL "
+	fprintf(stderr, "Usage: spirv-cross [--output <output path>] [SPIR-V file] [--es] [--no-es] [--no-cfg-analysis] "
+	                "[--version <GLSL "
 	                "version>] [--dump-resources] [--help] [--force-temporary] [--cpp] [--cpp-interface-name <name>] "
 	                "[--metal] [--vulkan-semantics] [--flatten-ubo] [--fixup-clipspace] [--iterations iter] [--pls-in "
 	                "format input-name] [--pls-out format output-name] [--remap source_name target_name components] "
@ -519,6 +521,7 @@ int main(int argc, char *argv[])
 		args.version = parser.next_uint();
 		args.set_version = true;
 	});
+	cbs.add("--no-cfg-analysis", [&args](CLIParser &) { args.cfg_analysis = false; });
 	cbs.add("--dump-resources", [&args](CLIParser &) { args.dump_resources = true; });
 	cbs.add("--force-temporary", [&args](CLIParser &) { args.force_temporary = true; });
 	cbs.add("--flatten-ubo", [&args](CLIParser &) { args.flatten_ubo = true; });
@ -623,6 +626,7 @@ int main(int argc, char *argv[])
 	opts.force_temporary = args.force_temporary;
 	opts.vulkan_semantics = args.vulkan_semantics;
 	opts.vertex.fixup_clipspace = args.fixup;
+	opts.cfg_analysis = args.cfg_analysis;
 	compiler->set_options(opts);

 	ShaderResources res;
--- a/msvc/SPIRV-Cross.vcxproj
+++ b/msvc/SPIRV-Cross.vcxproj
@ -128,6 +128,7 @@
    <ClCompile Include="..\spirv_cross.cpp" />
    <ClCompile Include="..\spirv_glsl.cpp" />
    <ClCompile Include="..\spirv_msl.cpp" />
+    <ClCompile Include="..\spirv_cfg.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\GLSL.std.450.h" />
@ -137,6 +138,7 @@
    <ClInclude Include="..\spirv_glsl.hpp" />
    <ClInclude Include="..\spirv.hpp" />
    <ClInclude Include="..\spirv_msl.hpp" />
+    <ClInclude Include="..\spirv_cfg.hpp" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
--- a/msvc/SPIRV-Cross.vcxproj.filters
+++ b/msvc/SPIRV-Cross.vcxproj.filters
@ -30,6 +30,9 @@
    <ClCompile Include="..\spirv_msl.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\spirv_cfg.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\GLSL.std.450.h">
@ -53,5 +56,8 @@
    <ClInclude Include="..\spirv_msl.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="..\spirv_cfg.hpp">
+      <Filter>Header Files</Filter>
+    </ClInclude>
  </ItemGroup>
 </Project>
--- a/reference/shaders/comp/cfg.comp
+++ b/reference/shaders/comp/cfg.comp
@ -0,0 +1,82 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float data;
+} _11;
+
+void test()
+{
+    float m;
+    if ((_11.data != 0.0))
+    {
+        float tmp = 10.0;
+        _11.data = tmp;
+    }
+    else
+    {
+        float tmp_1 = 15.0;
+        _11.data = tmp_1;
+    }
+    if ((_11.data != 0.0))
+    {
+        float e;
+        if ((_11.data != 5.0))
+        {
+            if ((_11.data != 6.0))
+            {
+                e = 10.0;
+            }
+        }
+        else
+        {
+            e = 20.0;
+        }
+    }
+    switch (int(_11.data))
+    {
+        case 0:
+        {
+            float tmp_2 = 20.0;
+            _11.data = tmp_2;
+            break;
+        }
+        case 1:
+        {
+            float tmp_3 = 30.0;
+            _11.data = tmp_3;
+            break;
+        }
+    }
+    float f;
+    switch (int(_11.data))
+    {
+        case 0:
+        {
+            f = 30.0;
+            break;
+        }
+        case 1:
+        {
+            f = 40.0;
+            break;
+        }
+    }
+    int i = 0;
+    float h;
+    for (; (i < 20); i = (i + 1), h = (h + 10.0))
+    {
+    }
+    _11.data = h;
+    do
+    {
+    } while ((m != 20.0));
+    _11.data = m;
+}
+
+void main()
+{
+    test();
+}
+
--- a/reference/shaders/comp/generate_height.comp
+++ b/reference/shaders/comp/generate_height.comp
@ -19,7 +19,6 @@ layout(binding = 1, std430) buffer HeightmapFFT
 uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel)
 {
    uint _137;
-    uint _148;
    if (sel.x)
    {
        _137 = b.x;
@ -29,6 +28,7 @@ uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel)
        _137 = a.x;
    }
    uint _147 = _137;
+    uint _148;
    if (sel.y)
    {
        _148 = b.y;
--- a/reference/shaders/comp/loop.comp
+++ b/reference/shaders/comp/loop.comp
@ -18,9 +18,6 @@ void main()
    vec4 idat = _24.in_data[ident];
    int k = 0;
    uint i = 0u;
-    uint i_1;
-    uint j;
-    int l;
    if ((idat.y == 20.0))
    {
        do
@ -63,10 +60,10 @@ void main()
        idat = (idat * 2.0);
        k = (k + 1);
    }
-    i_1 = 0u;
+    uint i_1 = 0u;
    for (; (i_1 < 16u); i_1 = (i_1 + uint(1)), k = (k + 1))
    {
-        j = 0u;
+        uint j = 0u;
        for (; (j < 30u); j = (j + uint(1)))
        {
            idat = (_24.mvp * idat);
@ -93,7 +90,7 @@ void main()
    {
        k = (k + 1);
    } while ((k > 10));
-    l = 0;
+    int l = 0;
    for (;;)
    {
        if ((l == 5))
--- a/reference/shaders/comp/return.comp
+++ b/reference/shaders/comp/return.comp
@ -9,7 +9,6 @@ layout(binding = 1, std430) buffer SSBO2
 void main()
 {
    uint ident = gl_GlobalInvocationID.x;
-    int i;
    if ((ident == 2u))
    {
        _27.out_data[ident] = vec4(20.0);
@ -22,7 +21,7 @@ void main()
            return;
        }
    }
-    i = 0;
+    int i = 0;
    for (; (i < 20); i = (i + 1))
    {
        if ((i == 10))
--- a/reference/shaders/comp/torture-loop.comp
+++ b/reference/shaders/comp/torture-loop.comp
@ -17,8 +17,6 @@ void main()
    uint ident = gl_GlobalInvocationID.x;
    vec4 idat = _24.in_data[ident];
    int k = 0;
-    uint i;
-    uint j;
    for (;;)
    {
        int _39 = k;
@ -35,10 +33,10 @@ void main()
            break;
        }
    }
-    i = 0u;
+    uint i = 0u;
    for (; (i < 16u); i = (i + uint(1)), k = (k + 1))
    {
-        j = 0u;
+        uint j = 0u;
        for (; (j < 30u); j = (j + uint(1)))
        {
            idat = (_24.mvp * idat);
--- a/reference/shaders/frag/mix.frag
+++ b/reference/shaders/frag/mix.frag
@ -15,7 +15,6 @@ void main()
    bool f = true;
    FragColor = vec4(mix(vIn2, vIn3, f));
    highp vec4 _35;
-    highp float _44;
    if (f)
    {
        _35 = vIn0;
@ -25,6 +24,7 @@ void main()
        _35 = vIn1;
    }
    FragColor = _35;
+    highp float _44;
    if (f)
    {
        _44 = vIn2;
--- a/reference/shaders/tesc/water_tess.tesc
+++ b/reference/shaders/tesc/water_tess.tesc
@ -99,7 +99,6 @@ void main()
 {
    vec2 p0 = vPatchPosBase[0];
    vec2 param = p0;
-    vec2 param_1;
    if ((!frustum_cull(param)))
    {
        gl_TessLevelOuter[0] = -1.0;
@ -111,7 +110,7 @@ void main()
    }
    else
    {
-        param_1 = p0;
+        vec2 param_1 = p0;
        compute_tess_levels(param_1);
    }
 }
--- a/reference/shaders/vert/ground.vert
+++ b/reference/shaders/vert/ground.vert
@ -58,10 +58,7 @@ vec2 warp_position()
    uint ufloor_lod = uint(floor_lod);
    uvec2 uPosition = uvec2(Position);
    uvec2 mask = ((uvec2(1u) << uvec2(ufloor_lod, (ufloor_lod + 1u))) - uvec2(1u));
-    uvec2 rounding;
    uint _332;
-    uint _343;
-    vec4 lower_upper_snapped;
    if ((uPosition.x < 32u))
    {
        _332 = mask.x;
@ -71,6 +68,7 @@ vec2 warp_position()
        _332 = 0u;
    }
    uint _342 = _332;
+    uint _343;
    if ((uPosition.y < 32u))
    {
        _343 = mask.y;
@ -79,8 +77,8 @@ vec2 warp_position()
    {
        _343 = 0u;
    }
-    rounding = uvec2(_342, _343);
-    lower_upper_snapped = vec4(((uPosition + rounding).xyxy & (~mask).xxyy));
+    uvec2 rounding = uvec2(_342, _343);
+    vec4 lower_upper_snapped = vec4(((uPosition + rounding).xyxy & (~mask).xxyy));
    return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod));
 }

--- a/reference/shaders/vert/ocean.vert
+++ b/reference/shaders/vert/ocean.vert
@ -59,12 +59,7 @@ vec2 warp_position()
    uint ufloor_lod = uint(floor_lod);
    uvec4 uPosition = uvec4(Position);
    uvec2 mask = ((uvec2(1u) << uvec2(ufloor_lod, (ufloor_lod + 1u))) - uvec2(1u));
-    uvec4 rounding;
    uint _333;
-    uint _345;
-    uint _356;
-    uint _368;
-    vec4 lower_upper_snapped;
    if ((uPosition.x < 32u))
    {
        _333 = mask.x;
@ -73,7 +68,9 @@ vec2 warp_position()
    {
        _333 = 0u;
    }
+    uvec4 rounding;
    rounding.x = _333;
+    uint _345;
    if ((uPosition.y < 32u))
    {
        _345 = mask.x;
@ -83,6 +80,7 @@ vec2 warp_position()
        _345 = 0u;
    }
    rounding.y = _345;
+    uint _356;
    if ((uPosition.x < 32u))
    {
        _356 = mask.y;
@ -92,6 +90,7 @@ vec2 warp_position()
        _356 = 0u;
    }
    rounding.z = _356;
+    uint _368;
    if ((uPosition.y < 32u))
    {
        _368 = mask.y;
@ -101,7 +100,7 @@ vec2 warp_position()
        _368 = 0u;
    }
    rounding.w = _368;
-    lower_upper_snapped = vec4(((uPosition.xyxy + rounding) & (~mask).xxyy));
+    vec4 lower_upper_snapped = vec4(((uPosition.xyxy + rounding) & (~mask).xxyy));
    return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod));
 }

--- a/shaders/comp/cfg.comp
+++ b/shaders/comp/cfg.comp
@ -0,0 +1,91 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float data;
+};
+
+void test()
+{
+	// Test that variables local to a scope stay local.
+	if (data != 0.0)
+	{
+		float tmp = 10.0;
+		data = tmp;
+	}
+	else
+	{
+		float tmp = 15.0;
+		data = tmp;
+	}
+
+	// Test that variable access propagates up to dominator
+	if (data != 0.0)
+	{
+		float e;
+		if (data != 5.0)
+		{
+			if (data != 6.0)
+				e = 10.0;
+		}
+		else
+			e = 20.0;
+	}
+
+	// Test that variables local to a switch block stay local.
+	switch (int(data))
+	{
+		case 0:
+		{
+			float tmp = 20.0;
+			data = tmp;
+			break;
+		}
+
+		case 1:
+		{
+			float tmp = 30.0;
+			data = tmp;
+			break;
+		}
+	}
+
+	// Check that multibranches propagate up to dominator.
+	float f;
+	switch (int(data))
+	{
+		case 0:
+		{
+			f = 30.0;
+			break;
+		}
+
+		case 1:
+		{
+			f = 40.0;
+			break;
+		}
+	}
+
+	// Check that loops work.
+	// Interesting case here is propagating variable access from the continue block.
+	float h;
+	for (int i = 0; i < 20; i++, h += 10.0)
+		;
+	data = h;
+
+	// Do the same with do-while, gotta test all the hard cases.
+	float m;
+	do
+	{
+	} while (m != 20.0);
+	data = m;
+}
+
+void main()
+{
+	// Test that we do the CFG analysis for all functions.
+	test();
+}
+
--- a/spirv_cfg.cpp
+++ b/spirv_cfg.cpp
@ -0,0 +1,229 @@
+/*
+ * Copyright 2016 ARM Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "spirv_cfg.hpp"
+#include <algorithm>
+#include <assert.h>
+
+using namespace std;
+
+namespace spirv_cross
+{
+CFG::CFG(Compiler &compiler_, const SPIRFunction &func_)
+    : compiler(compiler_)
+    , func(func_)
+{
+	preceding_edges.resize(compiler.get_current_id_bound());
+	succeeding_edges.resize(compiler.get_current_id_bound());
+	visit_order.resize(compiler.get_current_id_bound());
+	immediate_dominators.resize(compiler.get_current_id_bound());
+
+	build_post_order_visit_order();
+	build_immediate_dominators();
+}
+
+uint32_t CFG::find_common_dominator(uint32_t a, uint32_t b) const
+{
+	while (a != b)
+	{
+		if (visit_order[a] < visit_order[b])
+			a = immediate_dominators[a];
+		else
+			b = immediate_dominators[b];
+	}
+	return a;
+}
+
+uint32_t CFG::update_common_dominator(uint32_t a, uint32_t b)
+{
+	auto dominator = find_common_dominator(immediate_dominators[a], immediate_dominators[b]);
+	immediate_dominators[a] = dominator;
+	immediate_dominators[b] = dominator;
+	return dominator;
+}
+
+void CFG::build_immediate_dominators()
+{
+	// Traverse the post-order in reverse and build up the immediate dominator tree.
+	fill(begin(immediate_dominators), end(immediate_dominators), 0);
+	immediate_dominators[func.entry_block] = func.entry_block;
+
+	for (auto i = post_order.size(); i; i--)
+	{
+		uint32_t block = post_order[i - 1];
+		auto &pred = preceding_edges[block];
+		if (pred.empty()) // This is for the entry block, but we've already set up the dominators.
+			continue;
+
+		for (auto &edge : pred)
+		{
+			if (immediate_dominators[block])
+			{
+				assert(immediate_dominators[edge]);
+				immediate_dominators[block] = update_common_dominator(block, edge);
+			}
+			else
+				immediate_dominators[block] = edge;
+		}
+	}
+}
+
+bool CFG::is_back_edge(uint32_t to) const
+{
+	// We have a back edge if the visit order is set with the temporary magic value 0.
+	// Crossing edges will have already been recorded with a visit order.
+	return visit_order[to] == 0;
+}
+
+bool CFG::post_order_visit(uint32_t block_id)
+{
+	// If we have already branched to this block (back edge), stop recursion.
+	// If our branches are back-edges, we do not record them.
+	// We have to record crossing edges however.
+	if (visit_order[block_id] >= 0)
+		return !is_back_edge(block_id);
+
+	// Block back-edges from recursively revisiting ourselves.
+	visit_order[block_id] = 0;
+
+	// First visit our branch targets.
+	auto &block = compiler.get<SPIRBlock>(block_id);
+	switch (block.terminator)
+	{
+	case SPIRBlock::Direct:
+		if (post_order_visit(block.next_block))
+			add_branch(block_id, block.next_block);
+		break;
+
+	case SPIRBlock::Select:
+		if (post_order_visit(block.true_block))
+			add_branch(block_id, block.true_block);
+		if (post_order_visit(block.false_block))
+			add_branch(block_id, block.false_block);
+		break;
+
+	case SPIRBlock::MultiSelect:
+		for (auto &target : block.cases)
+		{
+			if (post_order_visit(target.block))
+				add_branch(block_id, target.block);
+		}
+		if (block.default_block && post_order_visit(block.default_block))
+			add_branch(block_id, block.default_block);
+		break;
+
+	default:
+		break;
+	}
+
+	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
+	visit_order[block_id] = ++visit_count;
+	post_order.push_back(block_id);
+	return true;
+}
+
+void CFG::build_post_order_visit_order()
+{
+	uint32_t block = func.entry_block;
+	visit_count = 0;
+	fill(begin(visit_order), end(visit_order), -1);
+	post_order.clear();
+	post_order_visit(block);
+}
+
+void CFG::add_branch(uint32_t from, uint32_t to)
+{
+	const auto add_unique = [](vector<uint32_t> &l, uint32_t value) {
+		auto itr = find(begin(l), end(l), value);
+		if (itr == end(l))
+			l.push_back(value);
+	};
+	add_unique(preceding_edges[to], from);
+	add_unique(succeeding_edges[from], to);
+}
+
+DominatorBuilder::DominatorBuilder(const CFG &cfg_)
+    : cfg(cfg_)
+{
+}
+
+void DominatorBuilder::add_block(uint32_t block)
+{
+	if (!cfg.get_immediate_dominator(block))
+	{
+		// Unreachable block via the CFG, we will never emit this code anyways.
+		return;
+	}
+
+	if (!dominator)
+	{
+		dominator = block;
+		return;
+	}
+
+	if (block != dominator)
+		dominator = cfg.find_common_dominator(block, dominator);
+}
+
+void DominatorBuilder::lift_continue_block_dominator()
+{
+	// It is possible for a continue block to be the dominator if a variable is only accessed inside the while block of a do-while loop.
+	// We cannot safely declare variables inside a continue block, so move any variable declared
+	// in a continue block to the entry block to simplify.
+	// It makes very little sense for a continue block to ever be a dominator, so fall back to the simplest
+	// solution.
+
+	if (!dominator)
+		return;
+
+	auto &block = cfg.get_compiler().get<SPIRBlock>(dominator);
+	auto post_order = cfg.get_visit_order(dominator);
+
+	// If we are branching to a block with a higher post-order traversal index (continue blocks), we have a problem
+	// since we cannot create sensible GLSL code for this, fallback to entry block.
+	bool back_edge_dominator = false;
+	switch (block.terminator)
+	{
+	case SPIRBlock::Direct:
+		if (cfg.get_visit_order(block.next_block) > post_order)
+			back_edge_dominator = true;
+		break;
+
+	case SPIRBlock::Select:
+		if (cfg.get_visit_order(block.true_block) > post_order)
+			back_edge_dominator = true;
+		if (cfg.get_visit_order(block.false_block) > post_order)
+			back_edge_dominator = true;
+		break;
+
+	case SPIRBlock::MultiSelect:
+		for (auto &target : block.cases)
+		{
+			if (cfg.get_visit_order(target.block) > post_order)
+				back_edge_dominator = true;
+		}
+		if (block.default_block && cfg.get_visit_order(block.default_block) > post_order)
+			back_edge_dominator = true;
+		break;
+
+	default:
+		break;
+	}
+
+	if (back_edge_dominator)
+		dominator = cfg.get_function().entry_block;
+}
+}
--- a/spirv_cfg.hpp
+++ b/spirv_cfg.hpp
@ -0,0 +1,97 @@
+/*
+ * Copyright 2016 ARM Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SPIRV_CROSS_CFG_HPP
+#define SPIRV_CROSS_CFG_HPP
+
+#include "spirv_cross.hpp"
+#include <assert.h>
+
+namespace spirv_cross
+{
+class CFG
+{
+public:
+	CFG(Compiler &compiler, const SPIRFunction &function);
+
+	Compiler &get_compiler()
+	{
+		return compiler;
+	}
+
+	const Compiler &get_compiler() const
+	{
+		return compiler;
+	}
+
+	const SPIRFunction &get_function() const
+	{
+		return func;
+	}
+
+	uint32_t get_immediate_dominator(uint32_t block) const
+	{
+		return immediate_dominators[block];
+	}
+
+	uint32_t get_visit_order(uint32_t block) const
+	{
+		int v = visit_order[block];
+		assert(v > 0);
+		return uint32_t(v);
+	}
+
+	uint32_t find_common_dominator(uint32_t a, uint32_t b) const;
+
+private:
+	Compiler &compiler;
+	const SPIRFunction &func;
+	std::vector<std::vector<uint32_t>> preceding_edges;
+	std::vector<std::vector<uint32_t>> succeeding_edges;
+	std::vector<uint32_t> immediate_dominators;
+	std::vector<int> visit_order;
+	std::vector<uint32_t> post_order;
+
+	void add_branch(uint32_t from, uint32_t to);
+	void build_post_order_visit_order();
+	void build_immediate_dominators();
+	bool post_order_visit(uint32_t block);
+	uint32_t visit_count = 0;
+
+	uint32_t update_common_dominator(uint32_t a, uint32_t b);
+	bool is_back_edge(uint32_t to) const;
+};
+
+class DominatorBuilder
+{
+public:
+	DominatorBuilder(const CFG &cfg);
+
+	void add_block(uint32_t block);
+	uint32_t get_dominator() const
+	{
+		return dominator;
+	}
+
+	void lift_continue_block_dominator();
+
+private:
+	const CFG &cfg;
+	uint32_t dominator = 0;
+};
+}
+
+#endif
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef SPIRV_COMMON_HPP
-#define SPIRV_COMMON_HPP
+#ifndef SPIRV_CROSS_COMMON_HPP
+#define SPIRV_CROSS_COMMON_HPP

 #include <functional>
 #include <sstream>
@ -356,7 +356,6 @@ struct SPIRBlock : IVariant

 		Select, // Block ends with an if/else block.
 		MultiSelect, // Block ends with switch statement.
-		Loop, // Block ends with a loop.

 		Return, // Block ends with return.
 		Unreachable, // Noop
@ -444,6 +443,10 @@ struct SPIRBlock : IVariant
 	// The dominating block which this block might be within.
 	// Used in continue; blocks to determine if we really need to write continue.
 	uint32_t loop_dominator = 0;
+
+	// All access to these variables are dominated by this block,
+	// so before branching anywhere we need to make sure that we declare these variables.
+	std::vector<uint32_t> dominated_variables;
 };

 struct SPIRFunction : IVariant
@ -510,6 +513,7 @@ struct SPIRFunction : IVariant
 	bool active = false;
 	bool flush_undeclared = true;
 	bool do_combined_parameters = true;
+	bool analyzed_variable_scope = false;
 };

 struct SPIRVariable : IVariant
--- a/spirv_cpp.hpp
+++ b/spirv_cpp.hpp
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef SPIRV_CPP_HPP
-#define SPIRV_CPP_HPP
+#ifndef SPIRV_CROSS_CPP_HPP
+#define SPIRV_CROSS_CPP_HPP

 #include "spirv_glsl.hpp"
 #include <utility>
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -16,6 +16,7 @@

 #include "spirv_cross.hpp"
 #include "GLSL.std.450.h"
+#include "spirv_cfg.hpp"
 #include <algorithm>
 #include <cstring>
 #include <utility>
@ -81,6 +82,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 			break;
 		}

+		case OpCopyMemory:
 		case OpStore:
 		{
 			auto &type = expression_type(ops[0]);
@ -485,9 +487,25 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 		variable = args[0];
 		break;

+	case OpCopyMemory:
+	{
+		if (length < 3)
+			return false;
+
+		auto *var = compiler.maybe_get<SPIRVariable>(args[0]);
+		if (var && storage_class_is_interface(var->storage))
+			variables.insert(variable);
+
+		var = compiler.maybe_get<SPIRVariable>(args[1]);
+		if (var && storage_class_is_interface(var->storage))
+			variables.insert(variable);
+		break;
+	}
+
 	case OpAccessChain:
 	case OpInBoundsAccessChain:
 	case OpLoad:
+	case OpCopyObject:
 	case OpImageTexelPointer:
 	case OpAtomicLoad:
 	case OpAtomicExchange:
@ -1975,6 +1993,8 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc

 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
 {
+	handler.set_current_block(block);
+
 	// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
 	// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
 	// inside dead blocks ...
@ -1987,6 +2007,9 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
 			return false;

 		if (op == OpFunctionCall)
+		{
+			auto &func = get<SPIRFunction>(ops[2]);
+			if (handler.follow_function_call(func))
 			{
 				if (!handler.begin_function_scope(ops, i.length))
 					return false;
@ -1996,6 +2019,7 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
 					return false;
 			}
 		}
+	}

 	return true;
 }
@ -2708,3 +2732,196 @@ const SPIRConstant &Compiler::get_constant(uint32_t id) const
 {
 	return get<SPIRConstant>(id);
 }
+
+void Compiler::analyze_variable_scope(SPIRFunction &entry)
+{
+	struct AccessHandler : OpcodeHandler
+	{
+	public:
+		AccessHandler(Compiler &compiler_)
+		    : compiler(compiler_)
+		{
+		}
+
+		bool follow_function_call(const SPIRFunction &)
+		{
+			// Only analyze within this function.
+			return false;
+		}
+
+		void set_current_block(const SPIRBlock &block)
+		{
+			current_block = &block;
+
+			// If we're branching to a block which uses OpPhi, in GLSL
+			// this will be a variable write when we branch,
+			// so we need to track access to these variables as well to
+			// have a complete picture.
+			const auto test_phi = [this, &block](uint32_t to) {
+				auto &next = compiler.get<SPIRBlock>(to);
+				for (auto &phi : next.phi_variables)
+					if (phi.parent == block.self)
+						accessed_variables_to_block[phi.function_variable].insert(block.self);
+			};
+
+			switch (block.terminator)
+			{
+			case SPIRBlock::Direct:
+				test_phi(block.next_block);
+				break;
+
+			case SPIRBlock::Select:
+				test_phi(block.true_block);
+				test_phi(block.false_block);
+				break;
+
+			case SPIRBlock::MultiSelect:
+				for (auto &target : block.cases)
+					test_phi(target.block);
+				if (block.default_block)
+					test_phi(block.default_block);
+				break;
+
+			default:
+				break;
+			}
+		}
+
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length)
+		{
+			switch (op)
+			{
+			case OpStore:
+			{
+				if (length < 2)
+					return false;
+
+				uint32_t ptr = args[0];
+				auto *var = compiler.maybe_get_backing_variable(ptr);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+				break;
+			}
+
+			case OpAccessChain:
+			case OpInBoundsAccessChain:
+			{
+				if (length < 3)
+					return false;
+
+				uint32_t ptr = args[2];
+				auto *var = compiler.maybe_get<SPIRVariable>(ptr);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+				break;
+			}
+
+			case OpCopyMemory:
+			{
+				if (length < 3)
+					return false;
+
+				uint32_t lhs = args[0];
+				uint32_t rhs = args[1];
+				auto *var = compiler.maybe_get_backing_variable(lhs);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+
+				var = compiler.maybe_get_backing_variable(rhs);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+				break;
+			}
+
+			case OpCopyObject:
+			{
+				if (length < 3)
+					return false;
+
+				auto *var = compiler.maybe_get_backing_variable(args[2]);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+				break;
+			}
+
+			case OpLoad:
+			{
+				if (length < 3)
+					return false;
+				uint32_t ptr = args[2];
+				auto *var = compiler.maybe_get_backing_variable(ptr);
+				if (var && var->storage == StorageClassFunction)
+					accessed_variables_to_block[var->self].insert(current_block->self);
+				break;
+			}
+
+			case OpFunctionCall:
+			{
+				if (length < 3)
+					return false;
+
+				length -= 3;
+				args += 3;
+				for (uint32_t i = 0; i < length; i++)
+				{
+					auto *var = compiler.maybe_get_backing_variable(args[i]);
+					if (var && var->storage == StorageClassFunction)
+						accessed_variables_to_block[var->self].insert(current_block->self);
+				}
+				break;
+			}
+
+			case OpPhi:
+			{
+				if (length < 2)
+					return false;
+
+				// Phi nodes are implemented as function variables, so register an access here.
+				accessed_variables_to_block[args[1]].insert(current_block->self);
+				break;
+			}
+
+			// Atomics shouldn't be able to access function-local variables.
+			// Some GLSL builtins access a pointer.
+
+			default:
+				break;
+			}
+			return true;
+		}
+
+		Compiler &compiler;
+		std::unordered_map<uint32_t, std::unordered_set<uint32_t>> accessed_variables_to_block;
+		const SPIRBlock *current_block = nullptr;
+	} handler(*this);
+
+	// First, we map out all variable access within a function.
+	// Essentially a map of block -> { variables accessed in the basic block }
+	traverse_all_reachable_opcodes(entry, handler);
+
+	// Compute the control flow graph for this function.
+	CFG cfg(*this, entry);
+
+	// For each variable which is statically accessed.
+	for (auto &var : handler.accessed_variables_to_block)
+	{
+		DominatorBuilder builder(cfg);
+		auto &blocks = var.second;
+
+		// Figure out which block is dominating all accesses of those variables.
+		for (auto &block : blocks)
+			builder.add_block(block);
+
+		builder.lift_continue_block_dominator();
+
+		// Add it to a per-block list of variables.
+		uint32_t dominating_block = builder.get_dominator();
+		// If all blocks here are dead code, this will be 0, so the variable in question
+		// will be completely eliminated.
+		if (dominating_block)
+		{
+			auto &block = get<SPIRBlock>(dominating_block);
+			block.dominated_variables.push_back(var.first);
+		}
+	}
+}
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@ -109,6 +109,9 @@ struct BufferRange
 class Compiler
 {
 public:
+	friend class CFG;
+	friend class DominatorBuilder;
+
 	// The constructor takes a buffer of SPIR-V words and parses it.
 	Compiler(std::vector<uint32_t> ir);

@ -309,6 +312,11 @@ public:
 	SPIRConstant &get_constant(uint32_t id);
 	const SPIRConstant &get_constant(uint32_t id) const;

+	uint32_t get_current_id_bound() const
+	{
+		return uint32_t(ids.size());
+	}
+
 protected:
 	const uint32_t *stream(const Instruction &instr) const
 	{
@ -473,6 +481,8 @@ protected:
 			variable_remap_callback(type, var_name, type_name);
 	}

+	void analyze_variable_scope(SPIRFunction &function);
+
 private:
 	void parse();
 	void parse(const Instruction &i);
@ -486,6 +496,15 @@ private:
 		// If false, traversal will end immediately.
 		virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0;

+		virtual bool follow_function_call(const SPIRFunction &)
+		{
+			return true;
+		}
+
+		virtual void set_current_block(const SPIRBlock &)
+		{
+		}
+
 		virtual bool begin_function_scope(const uint32_t *, uint32_t)
 		{
 			return true;
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -3678,14 +3678,31 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}

+	case OpCopyMemory:
+	{
+		uint32_t lhs = ops[0];
+		uint32_t rhs = ops[1];
+		if (lhs != rhs)
+		{
+			flush_variable_declaration(lhs);
+			flush_variable_declaration(rhs);
+			statement(to_expression(lhs), " = ", to_expression(rhs), ";");
+			register_write(lhs);
+		}
+		break;
+	}
+
 	case OpCopyObject:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		uint32_t rhs = ops[2];
-		if (expression_is_lvalue(rhs))
+		bool pointer = get<SPIRType>(result_type).pointer;
+
+		if (expression_is_lvalue(rhs) && !pointer)
 		{
 			// Need a copy.
+			// For pointer types, we copy the pointer itself.
 			statement(declare_temporary(result_type, id), to_expression(rhs), ";");
 			set<SPIRExpression>(id, to_name(id), result_type, true);
 		}
@ -3694,7 +3711,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// RHS expression is immutable, so just forward it.
 			// Copying these things really make no sense, but
 			// seems to be allowed anyways.
-			set<SPIRExpression>(id, to_expression(rhs), result_type, true);
+			auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
+			if (pointer)
+			{
+				auto *var = maybe_get_backing_variable(rhs);
+				e.loaded_from = var ? var->self : 0;
+			}
 		}
 		break;
 	}
@ -5211,6 +5233,16 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 	}

 	auto &entry_block = get<SPIRBlock>(func.entry_block);
+
+	if (!func.analyzed_variable_scope)
+	{
+		if (options.cfg_analysis)
+			analyze_variable_scope(func);
+		else
+			entry_block.dominated_variables = func.local_variables;
+		func.analyzed_variable_scope = true;
+	}
+
 	entry_block.loop_dominator = SPIRBlock::NoDominator;
 	emit_block_chain(entry_block);

@ -5484,9 +5516,13 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 	}
 	else if (method == SPIRBlock::MergeToDirectForLoop)
 	{
-		uint32_t current_count = statement_count;
 		auto &child = get<SPIRBlock>(block.next_block);

+		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
+		flush_undeclared_variables(child);
+
+		uint32_t current_count = statement_count;
+
 		// If we're trying to create a true for loop,
 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
@ -5530,20 +5566,15 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 		return false;
 }

-void CompilerGLSL::flush_undeclared_variables()
+void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
 {
-	// Declare undeclared variables.
-	if (current_function->flush_undeclared)
-	{
-		for (auto &v : current_function->local_variables)
+	for (auto &v : block.dominated_variables)
 	{
 		auto &var = get<SPIRVariable>(v);
 		if (var.deferred_declaration)
 			statement(variable_decl(var), ";");
 		var.deferred_declaration = false;
 	}
-		current_function->flush_undeclared = false;
-	}
 }

 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
@ -5568,7 +5599,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
 	if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
 	{
-		flush_undeclared_variables();
+		flush_undeclared_variables(block);
 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
 		{
 			// The body of while, is actually just the true block, so always branch there
@ -5580,7 +5611,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	// a new block, which in turn has a OpBranchSelection without a selection merge.
 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
 	{
-		flush_undeclared_variables();
+		flush_undeclared_variables(block);
 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
 			skip_direct_branch = true;
 	}
@ -5593,7 +5624,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	}
 	else if (block.merge == SPIRBlock::MergeLoop)
 	{
-		flush_undeclared_variables();
+		flush_undeclared_variables(block);

 		// We have a generic loop without any distinguishable pattern like for, while or do while.
 		get<SPIRBlock>(block.continue_block).complex_continue = true;
@ -5610,6 +5641,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 			emit_instruction(op);
 	}

+	flush_undeclared_variables(block);
 	bool emit_next_block = true;

 	// Handle end of block.
@ -5637,15 +5669,11 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		if (select_branch_to_true_block)
 			branch(block.self, block.true_block);
 		else
-		{
-			flush_undeclared_variables();
 			branch(block.self, block.condition, block.true_block, block.false_block);
-		}
 		break;

 	case SPIRBlock::MultiSelect:
 	{
-		flush_undeclared_variables();
 		auto &type = expression_type(block.condition);
 		bool uint32_t_case = type.basetype == SPIRType::UInt;

--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef SPIRV_GLSL_HPP
-#define SPIRV_GLSL_HPP
+#ifndef SPIRV_CROSS_GLSL_HPP
+#define SPIRV_CROSS_GLSL_HPP

 #include "spirv_cross.hpp"
 #include <sstream>
@ -60,6 +60,9 @@ public:
 		bool es = false;
 		bool force_temporary = false;

+		// If true, variables will be moved to their appropriate scope through CFG analysis.
+		bool cfg_analysis = true;
+
 		// If true, Vulkan GLSL features are used instead of GL-compatible features.
 		// Mostly useful for debugging SPIR-V files.
 		bool vulkan_semantics = false;
@ -257,7 +260,7 @@ protected:
 	void flush_phi(uint32_t from, uint32_t to);
 	bool flush_phi_required(uint32_t from, uint32_t to);
 	void flush_variable_declaration(uint32_t id);
-	void flush_undeclared_variables();
+	void flush_undeclared_variables(SPIRBlock &block);

 	bool should_forward(uint32_t id);
 	void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef SPIRV_MSL_HPP
-#define SPIRV_MSL_HPP
+#ifndef SPIRV_CROSS_MSL_HPP
+#define SPIRV_CROSS_MSL_HPP

 #include "spirv_glsl.hpp"
 #include <set>