Initial release (cfe8342494bbc2)

2016-11-16 09:35:08 +02:00 · 2016-11-16 09:35:08 +02:00 · eea476093c
commit eea476093c
parent 07a739239e
62 changed files with 37955 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,50 @@
 # HLSLcc
 DirectX shader bytecode cross compiler
+
+Originally based on https://github.com/James-Jones/HLSLCrossCompiler.
+
+This library takes DirectX bytecode as input, and translates it into the following languages:
+- GLSL (OpenGL 3.2 and later)
+- GLSL ES (OpenGL ES 3.0 and later)
+- GLSL ES for Vulkan consumption
+- Metal Shading Language
+
+This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan.
+
+Changes from original HLSLCrossCompiler:
+- Codebase changed to C++11, with major code reorganizations.
+- Support for multiple language output backends (currently ToGLSL and ToMetal)
+- Metal language output support
+- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts).
+- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form
+- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers.
+- Reflection interface to retrieve the shader inputs and their types.
+- Lots of workarounds for various driver/shader compiler bugs.
+- Lots of minor fixes and improvements for correctness
+- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself.
+
+## Note
+
+This project does not include build files, or test suite, as they are integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile src/*.cpp (in C++11 mode!) and src/cbstring/*.c with the following include paths:
+
+- include
+- src/internal_includes
+- src/cbstrinc
+- src 
+
+The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input).
+
+
+## Contributors
+- Mikko Strandborg
+- Juho Oravainen
+- David Rogers
+- Marton Ekler
+- Antti Tapaninen
+- Florian Penzkofer
+- Alexey Orlov
+- Povilas Kanapickas
+
+## License
+
+See license.txt.
--- a/include/ShaderInfo.h
+++ b/include/ShaderInfo.h
@ -0,0 +1,493 @@
+#pragma once
+
+#include <vector>
+#include <set>
+#include <map>
+#include <string>
+#include "growing_array.h"
+#include <stdint.h>
+//Reflection
+#define MAX_RESOURCE_BINDINGS 256
+
+typedef enum _SHADER_VARIABLE_TYPE {
+	SVT_VOID = 0,
+	SVT_BOOL = 1,
+	SVT_INT = 2,
+	SVT_FLOAT = 3,
+	SVT_STRING = 4,
+	SVT_TEXTURE = 5,
+	SVT_TEXTURE1D = 6,
+	SVT_TEXTURE2D = 7,
+	SVT_TEXTURE3D = 8,
+	SVT_TEXTURECUBE = 9,
+	SVT_SAMPLER = 10,
+	SVT_PIXELSHADER = 15,
+	SVT_VERTEXSHADER = 16,
+	SVT_UINT = 19,
+	SVT_UINT8 = 20,
+	SVT_GEOMETRYSHADER = 21,
+	SVT_RASTERIZER = 22,
+	SVT_DEPTHSTENCIL = 23,
+	SVT_BLEND = 24,
+	SVT_BUFFER = 25,
+	SVT_CBUFFER = 26,
+	SVT_TBUFFER = 27,
+	SVT_TEXTURE1DARRAY = 28,
+	SVT_TEXTURE2DARRAY = 29,
+	SVT_RENDERTARGETVIEW = 30,
+	SVT_DEPTHSTENCILVIEW = 31,
+	SVT_TEXTURE2DMS = 32,
+	SVT_TEXTURE2DMSARRAY = 33,
+	SVT_TEXTURECUBEARRAY = 34,
+	SVT_HULLSHADER = 35,
+	SVT_DOMAINSHADER = 36,
+	SVT_INTERFACE_POINTER = 37,
+	SVT_COMPUTESHADER = 38,
+	SVT_DOUBLE = 39,
+	SVT_RWTEXTURE1D = 40,
+	SVT_RWTEXTURE1DARRAY = 41,
+	SVT_RWTEXTURE2D = 42,
+	SVT_RWTEXTURE2DARRAY = 43,
+	SVT_RWTEXTURE3D = 44,
+	SVT_RWBUFFER = 45,
+	SVT_BYTEADDRESS_BUFFER = 46,
+	SVT_RWBYTEADDRESS_BUFFER = 47,
+	SVT_STRUCTURED_BUFFER = 48,
+	SVT_RWSTRUCTURED_BUFFER = 49,
+	SVT_APPEND_STRUCTURED_BUFFER = 50,
+	SVT_CONSUME_STRUCTURED_BUFFER = 51,
+
+
+
+	// Only used as a marker when analyzing register types
+	SVT_FORCED_INT = 152,
+	// Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis
+	SVT_INT_AMBIGUOUS = 153,
+
+	// Partial precision types. Used when doing type analysis
+	SVT_FLOAT10 = 53, // Seems to be used in constant buffers
+	SVT_FLOAT16 = 54,
+	SVT_INT16 = 156,
+	SVT_INT12 = 157,
+	SVT_UINT16 = 158,
+
+	SVT_FORCE_DWORD = 0x7fffffff
+} SHADER_VARIABLE_TYPE;
+
+typedef enum _SHADER_VARIABLE_CLASS {
+	SVC_SCALAR = 0,
+	SVC_VECTOR = (SVC_SCALAR + 1),
+	SVC_MATRIX_ROWS = (SVC_VECTOR + 1),
+	SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1),
+	SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1),
+	SVC_STRUCT = (SVC_OBJECT + 1),
+	SVC_INTERFACE_CLASS = (SVC_STRUCT + 1),
+	SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1),
+	SVC_FORCE_DWORD = 0x7fffffff
+} SHADER_VARIABLE_CLASS;
+
+
+
+///////////////////////////////////////
+// Types
+
+enum TESSELLATOR_PARTITIONING
+{
+	TESSELLATOR_PARTITIONING_UNDEFINED = 0,
+	TESSELLATOR_PARTITIONING_INTEGER = 1,
+	TESSELLATOR_PARTITIONING_POW2 = 2,
+	TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
+	TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
+};
+
+enum TESSELLATOR_OUTPUT_PRIMITIVE
+{
+	TESSELLATOR_OUTPUT_UNDEFINED = 0,
+	TESSELLATOR_OUTPUT_POINT = 1,
+	TESSELLATOR_OUTPUT_LINE = 2,
+	TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
+	TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
+};
+
+enum SPECIAL_NAME
+{
+	NAME_UNDEFINED = 0,
+	NAME_POSITION = 1,
+	NAME_CLIP_DISTANCE = 2,
+	NAME_CULL_DISTANCE = 3,
+	NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+	NAME_VIEWPORT_ARRAY_INDEX = 5,
+	NAME_VERTEX_ID = 6,
+	NAME_PRIMITIVE_ID = 7,
+	NAME_INSTANCE_ID = 8,
+	NAME_IS_FRONT_FACE = 9,
+	NAME_SAMPLE_INDEX = 10,
+	// The following are added for D3D11
+	NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11,
+	NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12,
+	NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13,
+	NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14,
+	NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15,
+	NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16,
+	NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17,
+	NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18,
+	NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19,
+	NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20,
+	NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
+	NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
+};
+
+
+enum INOUT_COMPONENT_TYPE {
+	INOUT_COMPONENT_UNKNOWN = 0,
+	INOUT_COMPONENT_UINT32 = 1,
+	INOUT_COMPONENT_SINT32 = 2,
+	INOUT_COMPONENT_FLOAT32 = 3
+};
+
+enum MIN_PRECISION {
+	MIN_PRECISION_DEFAULT = 0,
+	MIN_PRECISION_FLOAT_16 = 1,
+	MIN_PRECISION_FLOAT_2_8 = 2,
+	MIN_PRECISION_RESERVED = 3,
+	MIN_PRECISION_SINT_16 = 4,
+	MIN_PRECISION_UINT_16 = 5,
+	MIN_PRECISION_ANY_16 = 0xf0,
+	MIN_PRECISION_ANY_10 = 0xf1
+};
+
+enum ResourceType
+{
+	RTYPE_CBUFFER,//0
+	RTYPE_TBUFFER,//1
+	RTYPE_TEXTURE,//2
+	RTYPE_SAMPLER,//3
+	RTYPE_UAV_RWTYPED,//4
+	RTYPE_STRUCTURED,//5
+	RTYPE_UAV_RWSTRUCTURED,//6
+	RTYPE_BYTEADDRESS,//7
+	RTYPE_UAV_RWBYTEADDRESS,//8
+	RTYPE_UAV_APPEND_STRUCTURED,//9
+	RTYPE_UAV_CONSUME_STRUCTURED,//10
+	RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11
+	RTYPE_COUNT,
+};
+
+enum ResourceGroup {
+	RGROUP_CBUFFER,
+	RGROUP_TEXTURE,
+	RGROUP_SAMPLER,
+	RGROUP_UAV,
+	RGROUP_COUNT,
+};
+
+enum REFLECT_RESOURCE_DIMENSION
+{
+	REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0,
+	REFLECT_RESOURCE_DIMENSION_BUFFER = 1,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7,
+	REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8,
+	REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9,
+	REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
+	REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11,
+};
+
+enum REFLECT_RESOURCE_PRECISION
+{
+	REFLECT_RESOURCE_PRECISION_UNKNOWN = 0,
+	REFLECT_RESOURCE_PRECISION_LOWP = 1,
+	REFLECT_RESOURCE_PRECISION_MEDIUMP = 2,
+	REFLECT_RESOURCE_PRECISION_HIGHP = 3,
+
+};
+
+enum RESOURCE_RETURN_TYPE
+{
+	RETURN_TYPE_UNORM = 1,
+	RETURN_TYPE_SNORM = 2,
+	RETURN_TYPE_SINT = 3,
+	RETURN_TYPE_UINT = 4,
+	RETURN_TYPE_FLOAT = 5,
+	RETURN_TYPE_MIXED = 6,
+	RETURN_TYPE_DOUBLE = 7,
+	RETURN_TYPE_CONTINUED = 8,
+	RETURN_TYPE_UNUSED = 9,
+};
+
+typedef std::map<std::string, REFLECT_RESOURCE_PRECISION> HLSLccSamplerPrecisionInfo;
+
+struct ResourceBinding
+{
+	std::string name;
+	ResourceType eType;
+	uint32_t ui32BindPoint;
+	uint32_t ui32BindCount;
+	uint32_t ui32Flags;
+	REFLECT_RESOURCE_DIMENSION eDimension;
+	RESOURCE_RETURN_TYPE ui32ReturnType;
+	uint32_t ui32NumSamples;
+	REFLECT_RESOURCE_PRECISION ePrecision;
+
+	SHADER_VARIABLE_TYPE GetDataType() const
+	{
+		switch (ePrecision)
+		{
+		case REFLECT_RESOURCE_PRECISION_LOWP:
+			switch (ui32ReturnType)
+			{
+			case RETURN_TYPE_UNORM:
+			case RETURN_TYPE_SNORM:
+			case RETURN_TYPE_FLOAT:
+				return SVT_FLOAT10;
+			case RETURN_TYPE_SINT:
+				return SVT_INT16;
+			case RETURN_TYPE_UINT:
+				return SVT_UINT16;
+			default:
+//				ASSERT(0);
+				return SVT_FLOAT10;
+			}
+
+		case REFLECT_RESOURCE_PRECISION_MEDIUMP:
+			switch (ui32ReturnType)
+			{
+			case RETURN_TYPE_UNORM:
+			case RETURN_TYPE_SNORM:
+			case RETURN_TYPE_FLOAT:
+				return SVT_FLOAT16;
+			case RETURN_TYPE_SINT:
+				return SVT_INT16;
+			case RETURN_TYPE_UINT:
+				return SVT_UINT16;
+			default:
+//				ASSERT(0);
+				return SVT_FLOAT16;
+			}
+
+		default:
+			switch (ui32ReturnType)
+			{
+			case RETURN_TYPE_UNORM:
+			case RETURN_TYPE_SNORM:
+			case RETURN_TYPE_FLOAT:
+				return SVT_FLOAT;
+			case RETURN_TYPE_SINT:
+				return SVT_INT;
+			case RETURN_TYPE_UINT:
+				return SVT_UINT;
+			case RETURN_TYPE_DOUBLE:
+				return SVT_DOUBLE;
+			default:
+//				ASSERT(0);
+				return SVT_FLOAT;
+			}
+		}
+	}
+};
+
+struct ShaderVarType
+{
+	ShaderVarType() :
+	Class(),
+	Type(),
+	Rows(),
+	Columns(),
+	Elements(),
+	MemberCount(),
+	Offset(),
+	ParentCount(),
+	Parent(),
+	m_IsUsed(false)
+	{}
+
+	SHADER_VARIABLE_CLASS	Class;
+	SHADER_VARIABLE_TYPE	Type;
+	uint32_t                Rows;
+	uint32_t                Columns;
+	uint32_t                Elements;
+	uint32_t                MemberCount;
+	uint32_t                Offset;
+	std::string				name;
+
+	uint32_t ParentCount;
+	struct ShaderVarType * Parent;
+	//Includes all parent names.
+	std::string				fullName;
+
+	std::vector<struct ShaderVarType> Members;
+	
+	bool m_IsUsed; // If not set, is not used in the shader code
+
+	uint32_t GetMemberCount() const
+	{
+		if (Class == SVC_STRUCT)
+		{
+			uint32_t res = 0;
+			std::vector<struct ShaderVarType>::const_iterator itr;
+			for (itr = Members.begin(); itr != Members.end(); itr++)
+			{
+				res += itr->GetMemberCount();
+			}
+			return res;
+		}
+		else
+			return 1;
+	}
+
+};
+
+struct ShaderVar
+{
+	std::string name;
+	int haveDefaultValue;
+	std::vector<uint32_t> pui32DefaultValues;
+	//Offset/Size in bytes.
+	uint32_t ui32StartOffset;
+	uint32_t ui32Size;
+
+	ShaderVarType sType;
+};
+
+struct ConstantBuffer
+{
+	std::string name;
+
+	std::vector<ShaderVar> asVars;
+
+	uint32_t ui32TotalSizeInBytes;
+
+	uint32_t GetMemberCount(bool stripUnused) const
+	{
+		uint32_t res = 0;
+		std::vector<ShaderVar>::const_iterator itr;
+		for (itr = asVars.begin(); itr != asVars.end(); itr++)
+		{
+			if(stripUnused && !itr->sType.m_IsUsed)
+				continue;
+			res += itr->sType.GetMemberCount();
+		}
+		return res;
+	}
+};
+
+struct ClassType
+{
+	std::string name;
+	uint16_t ui16ID;
+	uint16_t ui16ConstBufStride;
+	uint16_t ui16Texture;
+	uint16_t ui16Sampler;
+};
+
+struct ClassInstance
+{
+	std::string name;
+	uint16_t ui16ID;
+	uint16_t ui16ConstBuf;
+	uint16_t ui16ConstBufOffset;
+	uint16_t ui16Texture;
+	uint16_t ui16Sampler;
+};
+
+class Operand;
+
+class ShaderInfo
+{
+public:
+
+	struct InOutSignature
+	{
+		std::string semanticName;
+		uint32_t ui32SemanticIndex;
+		SPECIAL_NAME eSystemValueType;
+		INOUT_COMPONENT_TYPE eComponentType;
+		uint32_t ui32Register;
+		uint32_t ui32Mask;
+		uint32_t ui32ReadWriteMask;
+
+		int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle
+
+		uint32_t ui32Stream;
+		MIN_PRECISION eMinPrec;
+
+		std::set<uint32_t> isIndexed; // Set of phases where this input/output is part of a index range.
+		std::map<uint32_t, uint32_t> indexStart; // If indexed, contains the start index for the range
+		std::map<uint32_t, uint32_t> index; // If indexed, contains the current index relative to the index start.
+
+	};
+
+	ShaderInfo() :
+		ui32MajorVersion(),
+		ui32MinorVersion(),
+		psResourceBindings(),
+		psConstantBuffers(),
+		psThisPointerConstBuffer(),
+		psClassTypes(),
+		psClassInstances()
+	{}
+
+	SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo);
+
+	int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const;
+
+	void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const;
+
+	int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const;
+
+	int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
+	int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
+	int GetOutputSignatureFromRegister(const uint32_t ui32Register,
+		const uint32_t ui32CompMask,
+		const uint32_t ui32Stream,
+		const InOutSignature** ppsOut,
+		bool allowNull = false) const;
+
+	int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const;
+
+	static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
+
+	static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
+		const uint32_t (&pui32Swizzle)[4],
+		const ConstantBuffer* psCBuf,
+		const ShaderVarType** ppsShaderVar,
+		bool* isArray,
+		std::vector<uint32_t>* arrayIndices,
+		int32_t* pi32Rebase,
+		uint32_t flags);
+
+	static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices);
+
+	// Apply shader precision information to resource bindings
+	void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
+
+	uint32_t ui32MajorVersion;
+	uint32_t ui32MinorVersion;
+
+	std::vector<InOutSignature> psInputSignatures;
+	std::vector<InOutSignature> psOutputSignatures;
+	std::vector<InOutSignature> psPatchConstantSignatures;
+
+	std::vector<ResourceBinding> psResourceBindings;
+
+	std::vector<ConstantBuffer> psConstantBuffers;
+	ConstantBuffer* psThisPointerConstBuffer;
+
+	std::vector<ClassType> psClassTypes;
+	std::vector<ClassInstance> psClassInstances;
+
+	//Func table ID to class name ID.
+	HLSLcc::growing_vector<uint32_t> aui32TableIDToTypeID;
+
+	HLSLcc::growing_vector<uint32_t> aui32ResourceMap[RGROUP_COUNT];
+
+	HLSLcc::growing_vector<ShaderVarType> sGroupSharedVarType;
+
+	TESSELLATOR_PARTITIONING eTessPartitioning;
+	TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
+};
+
--- a/include/growing_array.h
+++ b/include/growing_array.h
@ -0,0 +1,47 @@
+#pragma once
+
+namespace HLSLcc
+{
+	// A vector that automatically grows when written to, fills the intermediate ones with default value.
+	// Reading from an index returns the default value if attempting to access out of bounds.
+	template <class T> class growing_vector
+	{
+	public:
+		growing_vector() : data() {}
+
+		std::vector<T> data;
+
+		T & operator[](std::size_t idx)
+		{
+			if (idx >= data.size())
+				data.resize((idx + 1) * 2);
+			return data[idx];
+		}
+
+		const T & operator[](std::size_t idx) const
+		{
+			static T defaultValue = T();
+			if (idx >= data.size())
+				return defaultValue;
+			return data[idx];
+		}
+
+	};
+
+	// Same but with bool specialization
+	template <> class growing_vector<bool>
+	{
+	public:
+		growing_vector() : data() {}
+
+		std::vector<bool> data;
+
+		std::vector<bool>::reference operator[](std::size_t idx)
+		{
+			if (idx >= data.size())
+				data.resize((idx + 1) * 2, false);
+			return data[idx];
+		}
+
+	};
+};
--- a/include/hlslcc.h
+++ b/include/hlslcc.h
@ -0,0 +1,454 @@
+#ifndef HLSLCC_H_
+#define HLSLCC_H_
+
+#include <string>
+#include <vector>
+#include <map>
+
+#if defined (_WIN32) && defined(HLSLCC_DYNLIB)
+    #define HLSLCC_APIENTRY __stdcall
+    #if defined(libHLSLcc_EXPORTS)
+        #define HLSLCC_API __declspec(dllexport)
+    #else
+        #define HLSLCC_API __declspec(dllimport)
+    #endif
+#else
+    #define HLSLCC_APIENTRY
+    #define HLSLCC_API
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+typedef enum
+{
+    LANG_DEFAULT,// Depends on the HLSL shader model.
+    LANG_ES_100, LANG_ES_FIRST=LANG_ES_100,
+    LANG_ES_300,
+	LANG_ES_310, LANG_ES_LAST = LANG_ES_310,
+    LANG_120, LANG_GL_FIRST = LANG_120,
+    LANG_130,
+    LANG_140,
+    LANG_150,
+    LANG_330,
+    LANG_400,
+    LANG_410,
+    LANG_420,
+    LANG_430,
+    LANG_440, LANG_GL_LAST = LANG_440,
+	LANG_METAL,
+} GLLang;
+
+typedef struct GlExtensions {
+	uint32_t ARB_explicit_attrib_location : 1;
+	uint32_t ARB_explicit_uniform_location : 1;
+	uint32_t ARB_shading_language_420pack : 1;
+}GlExtensions;
+
+#include "ShaderInfo.h"
+
+typedef std::vector<std::string> TextureSamplerPairs;
+
+typedef enum INTERPOLATION_MODE
+{
+    INTERPOLATION_UNDEFINED = 0,
+    INTERPOLATION_CONSTANT = 1,
+    INTERPOLATION_LINEAR = 2,
+    INTERPOLATION_LINEAR_CENTROID = 3,
+    INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+    INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+    INTERPOLATION_LINEAR_SAMPLE = 6,
+    INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7,
+} INTERPOLATION_MODE;
+
+#define PS_FLAG_VERTEX_SHADER	0x1
+#define PS_FLAG_HULL_SHADER		0x2
+#define PS_FLAG_DOMAIN_SHADER	0x4
+#define PS_FLAG_GEOMETRY_SHADER 0x8
+#define PS_FLAG_PIXEL_SHADER	0x10
+
+#define TO_FLAG_NONE    0x0
+#define TO_FLAG_INTEGER 0x1
+#define TO_FLAG_NAME_ONLY 0x2
+#define TO_FLAG_DECLARATION_NAME 0x4
+#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment.
+#define TO_FLAG_UNSIGNED_INTEGER 0x10
+#define TO_FLAG_DOUBLE 0x20
+// --- TO_AUTO_BITCAST_TO_FLOAT ---
+//If the operand is an integer temp variable then this flag
+//indicates that the temp has a valid floating point encoding
+//and that the current expression expects the operand to be floating point
+//and therefore intBitsToFloat must be applied to that variable.
+#define TO_AUTO_BITCAST_TO_FLOAT 0x40
+#define TO_AUTO_BITCAST_TO_INT 0x80
+#define TO_AUTO_BITCAST_TO_UINT 0x100
+// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX
+// to match HLSL functionality.
+#define TO_AUTO_EXPAND_TO_VEC2 0x200
+#define TO_AUTO_EXPAND_TO_VEC3 0x400
+#define TO_AUTO_EXPAND_TO_VEC4 0x800
+#define TO_FLAG_BOOL 0x1000
+// These flags are only used for Metal:
+// Force downscaling of the operand to match
+// the other operand (Metal doesn't like mixing halfs with floats)
+#define TO_FLAG_FORCE_HALF 0x2000
+
+typedef enum
+{
+	INVALID_SHADER = -1,
+	PIXEL_SHADER,
+	VERTEX_SHADER,
+	GEOMETRY_SHADER,
+	HULL_SHADER,
+	DOMAIN_SHADER,
+	COMPUTE_SHADER,
+} SHADER_TYPE;
+
+// Enum for texture dimension reflection data 
+typedef enum
+{
+	TD_FLOAT = 0,
+	TD_INT,
+	TD_2D,
+	TD_3D,
+	TD_CUBE,
+	TD_2DSHADOW,
+	TD_2DARRAY,
+	TD_CUBEARRAY
+} HLSLCC_TEX_DIMENSION;
+
+// The prefix for all temporary variables used by the generated code.
+// Using a texture or uniform name like this will cause conflicts
+#define HLSLCC_TEMP_PREFIX "u_xlat"
+
+//The shader stages (Vertex, Pixel et al) do not depend on each other
+//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
+//that hull shaders must be compiled before domain shaders, and
+//the pixel shader must be compiled before all of the others.
+//During compilation the GLSLCrossDependencyData struct will
+//carry over any information needed about a different shader stage
+//in order to construct valid GLSL shader combinations.
+
+//Using GLSLCrossDependencyData is optional. However some shader
+//combinations may show link failures, or runtime errors.
+class GLSLCrossDependencyData
+{
+public:
+	// A container for a single Vulkan resource binding (<set, binding> pair)
+	typedef std::pair<uint32_t, uint32_t> VulkanResourceBinding;
+
+private:
+	//Required if PixelInterpDependency is true
+	std::vector<INTERPOLATION_MODE> pixelInterpolation;
+	
+	// Map of varying locations, indexed by varying names.
+	typedef std::map<std::string, uint32_t> VaryingLocations;
+
+	static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output)
+
+	VaryingLocations varyingLocationsMap[MAX_NAMESPACES];
+	uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES];
+
+	typedef std::map<std::string, VulkanResourceBinding> VulkanResourceBindings;
+	VulkanResourceBindings m_VulkanResourceBindings;
+	uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. 
+
+	inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
+	{
+		switch (eShaderType)
+		{
+		case VERTEX_SHADER:
+			return isInput ? 0 : 1;
+
+		case HULL_SHADER:
+			return isInput ? 1 : 2;
+
+		case DOMAIN_SHADER:
+			return isInput ? 2 : 3;
+
+		case GEOMETRY_SHADER:
+			// The input depends on whether there's a tessellation shader before us
+			if (isInput)
+			{
+				return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1;
+			}
+			return 4;
+
+		case PIXEL_SHADER:
+			// The inputs can come from geom shader, domain shader or directly from vertex shader
+			if (isInput)
+			{
+				if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
+				{
+					return 4;
+				}
+				else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
+				{
+					return 3;
+				}
+				else
+				{
+					return 1;
+				}
+			}
+			return 5; // This value never really used
+		default:
+			return 0;
+		}
+	}
+
+
+
+public:
+	GLSLCrossDependencyData()
+		: eTessPartitioning(),
+		eTessOutPrim(),
+		ui32ProgramStages(0)
+	{ 
+		memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
+		memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
+	}
+
+
+	// Retrieve the location for a varying with a given name.
+	// If the name doesn't already have an allocated location, allocate one
+	// and store it into the map.
+	inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput)
+	{
+		int nspace = GetVaryingNamespace(eShaderType, isInput);
+		VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
+		if (itr != varyingLocationsMap[nspace].end())
+			return itr->second;
+
+		uint32_t newKey = nextAvailableVaryingLocation[nspace];
+		nextAvailableVaryingLocation[nspace]++;
+		varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
+		return newKey;
+	}
+
+	// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
+	// If not found, allocate a new one (in set 0) and return that
+	// The returned value is a pair of <set, binding>
+	// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
+	// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
+	// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
+	inline std::pair<uint32_t, uint32_t> GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
+	{
+		// scan for the special marker
+		const char *marker = "Xhlslcc_set_%d_bind_%dX";
+		uint32_t Set = 0, Binding = 0;
+		size_t startLoc = name.find("Xhlslcc");
+		if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2))
+		{
+			// Get rid of all markers
+			while ((startLoc = name.find("Xhlslcc")) != std::string::npos)
+			{
+				size_t endLoc = name.find('X', startLoc + 1);
+				if (endLoc == std::string::npos)
+					break;
+				name.erase(startLoc, endLoc - startLoc + 1);
+			}
+			// Add to map
+			VulkanResourceBinding newBind = std::make_pair(Set, Binding);
+			m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
+			if (allocRoomForCounter)
+			{
+				VulkanResourceBinding counterBind = std::make_pair(Set, Binding+1);
+				m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
+			}
+
+			return newBind;
+		}
+
+		VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name);
+		if (itr != m_VulkanResourceBindings.end())
+			return itr->second;
+
+		// Allocate a new one
+		VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
+		m_NextAvailableVulkanResourceBinding[preferredSet]++;
+		m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
+		if (allocRoomForCounter)
+		{
+			VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
+			m_NextAvailableVulkanResourceBinding[preferredSet]++;
+			m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
+		}
+		return newBind;
+	}
+
+    //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
+    //but they appear on inputs inside domain shaders for GL.
+    //Hull shader must be compiled before domain so the
+    //ensure correct partitioning and primitive type information
+    //can be saved when compiling hull and passed to domain compilation.
+    TESSELLATOR_PARTITIONING eTessPartitioning;
+    TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
+
+	// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
+	// Needed so we can construct proper shader input and output names
+	uint32_t ui32ProgramStages;
+	
+	inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo)
+	{
+		if (regNo >= pixelInterpolation.size())
+			return INTERPOLATION_UNDEFINED;
+		else
+			return pixelInterpolation[regNo];
+	}
+
+	inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode)
+	{
+		if (regNo >= pixelInterpolation.size())
+			pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED);
+
+		pixelInterpolation[regNo] = mode;
+	}
+
+	inline void ClearCrossDependencyData()
+	{
+		pixelInterpolation.clear();
+		for (int i = 0; i < MAX_NAMESPACES; i++)
+		{
+			varyingLocationsMap[i].clear();
+			nextAvailableVaryingLocation[i] = 0;
+		}
+	}
+
+
+};
+
+struct GLSLShader
+{
+    int shaderType; //One of the GL enums.
+    std::string sourceCode;
+    ShaderInfo reflection;
+    GLLang GLSLLanguage;
+    TextureSamplerPairs textureSamplers;    // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out
+};
+
+// Interface for retrieving reflection and diagnostics data
+class HLSLccReflection
+{
+public:
+	HLSLccReflection() {}
+	virtual ~HLSLccReflection() {}
+
+	// Called on errors or diagnostic messages
+	virtual void OnDiagnostics(const std::string &error, int line, bool isError) {}
+
+	virtual void OnInputBinding(const std::string &name, int bindIndex) {}
+
+	virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; }
+
+	virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; }
+
+	virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
+
+	virtual void OnTextureBinding(const std::string &name, int bindIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
+
+	virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
+	
+	virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
+};
+
+
+/*HLSL constant buffers are treated as default-block unform arrays by default. This is done
+  to support versions of GLSL which lack ARB_uniform_buffer_object functionality.
+  Setting this flag causes each one to have its own uniform block.
+  Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/
+static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1;
+
+static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2;
+
+static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4;
+
+static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8;
+
+//GS enabled?
+//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS).
+//This flag is needed in order for the interfaces between stages to match when GS is in use.
+//PS inputs VtxGeoOutput
+//GS outputs VtxGeoOutput
+//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise.
+static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10;
+
+static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20;
+
+//Either use this flag or glBindFragDataLocationIndexed.
+//When set the first pixel shader output is the first input to blend
+//equation, the others go to the second input.
+static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40;
+
+//If set, shader inputs and outputs are declared with their semantic name.
+static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80;
+//If set, shader inputs and outputs are declared with their semantic name appended.
+static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100;
+
+//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername".
+static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200;
+
+//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that)
+static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400;
+
+//If set, global uniforms are not stored in a struct.
+static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800;
+
+//If set, image declarations will always have binding and format qualifiers.
+static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
+
+// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers
+// Also removes that prefix from generated output
+static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
+
+// If set, adds location qualifiers to intra-shader varyings.
+static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000;
+
+// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS
+// so that if that macro is defined, all UBO declarations will become normal uniforms
+static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
+
+// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
+static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000;
+
+#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d"
+
+// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtx<rows>x<cols>'
+static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000;
+
+// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "<Name>_hlslcc_set_X_bind_Y"
+// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData)
+static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
+
+// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
+static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
+                                                     unsigned int flags,
+                                                     GLLang language,
+													 const GlExtensions *extensions,
+                                                     GLSLCrossDependencyData* dependencies,
+													 HLSLccSamplerPrecisionInfo& samplerPrecisions,
+													 HLSLccReflection& reflectionCallbacks,
+													 GLSLShader* result
+													 );
+
+HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
+                                                    unsigned int flags,
+                                                    GLLang language,
+													const GlExtensions *extensions,
+                                                    GLSLCrossDependencyData* dependencies,
+													HLSLccSamplerPrecisionInfo& samplerPrecisions,
+													HLSLccReflection& reflectionCallbacks,
+                                                    GLSLShader* result);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- a/include/hlslcc.hpp
+++ b/include/hlslcc.hpp
@ -0,0 +1,5 @@
+
+extern "C" {
+#include "hlslcc.h"
+}
+
--- a/include/pstdint.h
+++ b/include/pstdint.h
@ -0,0 +1,800 @@
+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2011 Paul Hsieh
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.12
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *  John Dill
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# ifndef PRINTF_INT64_MODIFIER
+#  define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#ifndef uint8_t
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#ifndef int8_t
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#ifndef uint16_t
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#ifndef int16_t
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#ifndef uint32_t
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#ifndef int32_t
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what did Intel do about x86-64? */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/* 
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+ 
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
+ 
+int main () {
+	DECL(I,8)
+	DECL(U,8)
+	DECL(I,16)
+	DECL(U,16)
+	DECL(I,32)
+	DECL(U,32)
+#ifdef INT64_MAX
+	DECL(I,64)
+	DECL(U,64)
+#endif
+	intmax_t imax = INTMAX_C(0);
+	uintmax_t umax = UINTMAX_C(0);
+	char str0[256], str1[256];
+
+	sprintf (str0, "%d %x\n", 0, ~0);
+	
+	sprintf (str1, "%d %x\n",  i8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+	sprintf (str1, "%d %x\n",  i16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX	
+	sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+	
+	TESTUMAX(8);
+	TESTUMAX(16);
+	TESTUMAX(32);
+#ifdef INT64_MAX
+	TESTUMAX(64);
+#endif
+
+	return EXIT_SUCCESS;
+}
+
+#endif
--- a/license.txt
+++ b/license.txt
@ -0,0 +1,53 @@
+
+Original HLSLcc source code Copyright (c) 2012 James Jones
+Further improvements Copyright (c) 2014-2016 Unity Technologies
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+This software makes use of the bstring library which is provided under the following license:
+
+Copyright (c) 2002-2008 Paul Hsieh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+    Redistributions of source code must retain the above copyright notice, 
+    this list of conditions and the following disclaimer. 
+
+    Redistributions in binary form must reproduce the above copyright notice, 
+    this list of conditions and the following disclaimer in the documentation 
+    and/or other materials provided with the distribution. 
+
+    Neither the name of bstrlib nor the names of its contributors may be used 
+    to endorse or promote products derived from this software without 
+    specific prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+POSSIBILITY OF SUCH DAMAGE.
--- a/src/ControlFlowGraph.cpp
+++ b/src/ControlFlowGraph.cpp
@ -0,0 +1,824 @@
+
+#include "internal_includes/debug.h"
+#include "internal_includes/ControlFlowGraph.h"
+#include "internal_includes/ControlFlowGraphUtils.h"
+#include "internal_includes/Instruction.h"
+#include "internal_includes/Operand.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include <algorithm>
+
+using namespace HLSLcc::ControlFlow;
+using HLSLcc::ForEachOperand;
+
+const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction)
+{
+	using std::for_each;
+
+	m_BlockMap.clear();
+	m_BlockStorage.clear();
+
+	// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
+	BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL);
+
+	// Build the reachable set for each block
+	bool hadChanges;
+	do 
+	{
+		hadChanges = false;
+		for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr<BasicBlock> &bb)
+		{
+			BasicBlock &b = *bb.get();
+			if (b.RebuildReachable())
+			{
+				hadChanges = true;
+			}
+		});
+	} while (hadChanges == true);
+
+	return *root;
+}
+
+const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const
+{
+	BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
+	if (itr == m_BlockMap.end())
+		return NULL;
+
+	return itr->second;
+}
+
+BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction)
+{
+	BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
+	if (itr == m_BlockMap.end())
+		return NULL;
+
+	return itr->second;
+}
+
+
+
+
+// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
+// Auto-registers itself into ControlFlowGraph
+BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead)
+	: m_Graph(graph)
+	, m_First(psFirst)
+	, m_Last(NULL)
+{
+	m_UEVar.clear();
+	m_VarKill.clear();
+	m_Preceding.clear();
+	m_Succeeding.clear();
+	m_DEDef.clear();
+	m_Reachable.clear();
+
+	// Check that we've pruned the labels
+	ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst));
+
+	// Insert to block storage, block map and connect to previous block
+	m_Graph.m_BlockStorage.push_back(shared_ptr<BasicBlock>(this));
+
+	bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second;
+	ASSERT(didInsert);
+
+	if (psPrecedingBlockHead != NULL)
+	{
+		m_Preceding.insert(psPrecedingBlockHead);
+		BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead);
+		ASSERT(prec != 0);
+		didInsert = prec->m_Succeeding.insert(psFirst).second;
+		ASSERT(didInsert);
+	}
+
+	Build();
+}
+
+void BasicBlock::Build()
+{
+	const Instruction *inst = m_First;
+	while (1)
+	{
+		// Process sources first
+		ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
+			[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
+			{
+				if (psOperand->eType != OPERAND_TYPE_TEMP)
+					return;
+
+				uint32_t tempReg = psOperand->ui32RegisterNumber;
+				uint32_t accessMask = psOperand->GetAccessMask();
+
+				// Go through each component
+				for (int k = 0; k < 4; k++)
+				{
+					if (!(accessMask & (1 << k)))
+						continue;
+
+					uint32_t regIdx = tempReg * 4 + k;
+					// Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore
+					if (m_VarKill.find(regIdx) != m_VarKill.end())
+						continue;
+
+					// Add to UEVars set. Doesn't matter if it's already there.
+					m_UEVar.insert(regIdx);
+				}
+				return;
+			});
+
+		// Then the destination operands
+		ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
+			[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
+		{
+			if (psOperand->eType != OPERAND_TYPE_TEMP)
+				return;
+
+			uint32_t tempReg = psOperand->ui32RegisterNumber;
+			uint32_t accessMask = psOperand->GetAccessMask();
+
+			// Go through each component
+			for (int k = 0; k < 4; k++)
+			{
+				if (!(accessMask & (1 << k)))
+					continue;
+
+				uint32_t regIdx = tempReg * 4 + k;
+
+				// Add to kill set. Dupes are fine, this is a set.
+				m_VarKill.insert(regIdx);
+				// Also into the downward definitions. Overwrite the previous definition in this basic block, if any
+				Definition d(psInst, psOperand);
+				m_DEDef[regIdx].clear();
+				m_DEDef[regIdx].insert(d);
+			}
+			return;
+		});
+
+		// Check for flow control instructions
+		bool blockDone = false;
+		switch (inst->eOpcode)
+		{
+		default:
+			break;
+		case OPCODE_RET:
+			blockDone = true;
+			break;
+		case OPCODE_RETC:
+			// Basic block is done, start a next one.
+			// There REALLY should be no existing blocks for this one
+			ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst+1)) == NULL);
+			AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
+			blockDone = true;
+			break;
+		case OPCODE_LOOP:
+		case OPCODE_CASE:
+		case OPCODE_ENDIF:
+		case OPCODE_ENDSWITCH:
+			// Not a flow control branch, but need to start a new block anyway.
+			AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
+			blockDone = true;
+			break;
+
+		// Branches
+		case OPCODE_IF:
+		case OPCODE_BREAKC:
+		case OPCODE_CONTINUEC:
+		{
+			const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
+			ASSERT(jumpPoint != NULL);
+			
+			// The control branches to the next instruction or jumps to jumpPoint
+			AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst+1));
+			AddChildBasicBlock(jumpPoint);
+
+			blockDone = true;
+			break;
+		}
+		case OPCODE_SWITCH:
+		{
+			bool sawEndSwitch = false;
+			bool needConnectToParent = false;
+			const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent);
+			ASSERT(jumpPoint != NULL);
+
+			while (1)
+			{
+				if(!sawEndSwitch || needConnectToParent)
+					AddChildBasicBlock(jumpPoint);
+				
+				if (sawEndSwitch)
+					break;
+
+				// The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label
+				ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT);
+				jumpPoint = Utils::GetJumpPoint(jumpPoint-1, &sawEndSwitch, &needConnectToParent);
+				ASSERT(jumpPoint != NULL);
+			}
+			blockDone = true;
+			break;
+		}
+
+		// Non-conditional jumps
+		case OPCODE_BREAK:
+		case OPCODE_ELSE:
+		case OPCODE_CONTINUE:
+		case OPCODE_ENDLOOP:
+		{
+			const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
+			ASSERT(jumpPoint != NULL);
+
+			AddChildBasicBlock(jumpPoint);
+
+			blockDone = true;
+			break;
+		}
+		}
+
+		if (blockDone)
+			break;
+
+		inst++;
+	}
+	// In initial building phase, just make m_Reachable equal to m_DEDef
+	m_Reachable = m_DEDef;
+
+	// Tag the end of the basic block
+	m_Last = inst;
+//	printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
+}
+
+
+BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
+{
+	// First see if this already exists
+	BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst);
+	if (b)
+	{
+		// Just add dependency and we're done
+		b->m_Preceding.insert(m_First);
+		m_Succeeding.insert(psFirst);
+		return b;
+	}
+	// Otherwise create one. Self-registering and self-connecting
+	return new BasicBlock(psFirst, m_Graph, m_First);
+}
+
+bool BasicBlock::RebuildReachable()
+{
+	// Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes.
+	// Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill())
+
+	ReachableVariables newReachable = m_DEDef;
+	bool hasChanges = false;
+
+	// Loop each predecessor
+	std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr)
+	{
+		const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr);
+		const ReachableVariables &precReachable = prec->Reachable();
+
+		// Loop each variable*component
+		std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair<uint32_t, BasicBlock::ReachableDefinitionsPerVariable> &itr2)
+		{
+			uint32_t regIdx = itr2.first;
+			const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second;
+
+			// Already killed in this block?
+			if (VarKill().find(regIdx) != VarKill().end())
+				return;
+
+			// Only do comparisons against current definitions if we've yet to find any changes
+			BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0;
+			if (!hasChanges)
+				currReachablePerVar = &m_Reachable[regIdx];
+
+			BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx];
+
+			// Loop each definition
+			std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d)
+			{
+				if (!hasChanges)
+				{
+					// Check if already there
+					if (currReachablePerVar->find(d) == currReachablePerVar->end())
+						hasChanges = true;
+				}
+				newReachablePerVar.insert(d);
+			}); // definition
+
+		}); // variable*component
+	}); // predecessor
+	
+	if (hasChanges)
+	{
+		std::swap(m_Reachable, newReachable);
+	}
+
+	return hasChanges;
+}
+
+void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
+{
+	std::for_each(b.begin(), b.end(), [&a](const std::pair<uint32_t, ReachableDefinitionsPerVariable> &rpvPair)
+	{
+		uint32_t regIdx = rpvPair.first;
+		const ReachableDefinitionsPerVariable &rpv = rpvPair.second;
+		// No previous definitions for this variable?
+		auto aRPVItr = a.find(regIdx);
+		if (aRPVItr == a.end())
+		{
+			// Just set the definitions and continue
+			a[regIdx] = rpv;
+			return;
+		}
+		ReachableDefinitionsPerVariable &aRPV = aRPVItr->second;
+		aRPV.insert(rpv.begin(), rpv.end());
+	});
+}
+
+#if ENABLE_UNIT_TESTS
+
+#define UNITY_EXTERNAL_TOOL 1
+#include "Testing.h" // From Runtime/Testing
+
+UNIT_TEST_SUITE(HLSLccTests)
+{
+	TEST(ControlFlowGraph_Build_Simple_Works)
+	{
+		Instruction inst[] = 
+		{
+			// MOV t0.xyzw, I0.xyzw
+			Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf),
+			Instruction(1, OPCODE_RET)
+		};
+
+		ControlFlowGraph cfg;
+		const BasicBlock &root = cfg.Build(inst);
+
+		CHECK_EQUAL(&inst[0], root.First());
+		CHECK_EQUAL(&inst[1], root.Last());
+
+		CHECK(root.Preceding().empty());
+		CHECK(root.Succeeding().empty());
+
+		CHECK_EQUAL(4, root.VarKill().size());
+
+		// Check that all components from t0 are killed
+		CHECK_EQUAL(1, root.VarKill().count(0));
+		CHECK_EQUAL(1, root.VarKill().count(1));
+		CHECK_EQUAL(1, root.VarKill().count(2));
+		CHECK_EQUAL(1, root.VarKill().count(3));
+
+		CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction);
+		CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand);
+		CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction);
+		CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand);
+		CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction);
+		CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand);
+		CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction);
+		CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand);
+
+	}
+
+	TEST(ControlFlowGraph_Build_If_Works)
+	{
+		Instruction inst[] =
+		{
+			// B0
+			// 0: MOV t1.xyzw, i0.xyzw
+			Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf),
+			// 1: MUL t0, t1, t1
+			Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf),
+			// 2: IF t1.y
+			Instruction(2, OPCODE_IF, 1, 2),
+			// B1
+			// 3: MOV o0, t0
+			Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf),
+			// 4:
+			Instruction(4, OPCODE_ELSE),
+			// B2
+			// 5: MOV o0, t1
+			Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
+			// 6:
+			Instruction(6, OPCODE_ENDIF),
+			// B3
+			// 7:
+			Instruction(7, OPCODE_NOP),
+			// 8:
+			Instruction(8, OPCODE_RET)
+		};
+
+		ControlFlowGraph cfg;
+		const BasicBlock &root = cfg.Build(inst);
+
+		CHECK_EQUAL(root.First(), &inst[0]);
+		CHECK_EQUAL(root.Last(), &inst[2]);
+
+		CHECK(root.Preceding().empty());
+
+		const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
+		const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
+		const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]);
+
+		CHECK(b1 != NULL);
+		CHECK(b2 != NULL);
+		CHECK(b3 != NULL);
+
+		CHECK_EQUAL(&inst[3], b1->First());
+		CHECK_EQUAL(&inst[5], b2->First());
+		CHECK_EQUAL(&inst[7], b3->First());
+
+		CHECK_EQUAL(&inst[4], b1->Last());
+		CHECK_EQUAL(&inst[6], b2->Last());
+		CHECK_EQUAL(&inst[8], b3->Last());
+
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[5]));
+		CHECK_EQUAL(2, root.Succeeding().size());
+
+		CHECK_EQUAL(1, b1->Preceding().size());
+		CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
+
+		CHECK_EQUAL(1, b2->Preceding().size());
+		CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
+
+		CHECK_EQUAL(2, b3->Preceding().size());
+		CHECK_EQUAL(0, b3->Preceding().count(&inst[0]));
+		CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
+		CHECK_EQUAL(1, b3->Preceding().count(&inst[5]));
+
+		// The if block must have upwards-exposed t0
+		CHECK_EQUAL(1, b1->UEVar().count(0));
+		CHECK_EQUAL(1, b1->UEVar().count(1));
+		CHECK_EQUAL(1, b1->UEVar().count(2));
+		CHECK_EQUAL(1, b1->UEVar().count(3));
+
+		// The else block must have upwards-exposed t1
+		CHECK_EQUAL(1, b2->UEVar().count(4));
+		CHECK_EQUAL(1, b2->UEVar().count(5));
+		CHECK_EQUAL(1, b2->UEVar().count(6));
+		CHECK_EQUAL(1, b2->UEVar().count(7));
+
+		CHECK_EQUAL(8, root.VarKill().size());
+
+		// Check that all components from t0 and t1 are killed
+		CHECK_EQUAL(1, root.VarKill().count(0));
+		CHECK_EQUAL(1, root.VarKill().count(1));
+		CHECK_EQUAL(1, root.VarKill().count(2));
+		CHECK_EQUAL(1, root.VarKill().count(3));
+
+		CHECK_EQUAL(1, root.VarKill().count(4));
+		CHECK_EQUAL(1, root.VarKill().count(5));
+		CHECK_EQUAL(1, root.VarKill().count(6));
+		CHECK_EQUAL(1, root.VarKill().count(7));
+
+		// The expected downwards-exposed definitions:
+		// B0: t0, t1
+		// B1-B3: none
+
+		CHECK_EQUAL(8, root.DEDef().size());
+		CHECK_EQUAL(0, b1->DEDef().size());
+		CHECK_EQUAL(0, b2->DEDef().size());
+		CHECK_EQUAL(0, b3->DEDef().size());
+
+		CHECK(root.DEDef()==root.Reachable());
+
+		CHECK(root.Reachable()==b1->Reachable());
+		CHECK(root.Reachable()==b2->Reachable());
+		CHECK(root.Reachable()==b3->Reachable());
+
+
+	}
+
+	TEST(ControlFlowGraph_Build_SwitchCase_Works)
+	{
+		Instruction inst[] =
+		{
+			// Start B0
+			// i0: MOV t0.x, I0.x
+			Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
+			// i1: MOVE t1.xyz, I0.yzw
+			Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe),
+			// i2: MOVE t1.w, t0.x
+			Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1),
+			// i3: MOVE t2, I0
+			Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf),
+			// i4: SWITCH t0.y
+			Instruction(4, OPCODE_SWITCH, 1, 2),
+			// End B0
+			// i5: CASE
+			Instruction(5, OPCODE_CASE),
+			// i6: DEFAULT
+			Instruction(6, OPCODE_DEFAULT),
+			// Start B1
+			// i7: MOC t1.z, t0.x 
+			Instruction(7, OPCODE_MOV, 1, 4, 0, 1),
+			// i8: CASE
+			Instruction(8, OPCODE_CASE),
+			// End B1
+			// Start B2
+			// i9: MOV t1.z, t2.x
+			Instruction(9, OPCODE_MOV, 1, 4, 2, 1),
+			// i10: BREAK
+			Instruction(10, OPCODE_BREAK),
+			// End B2
+			// i11: CASE
+			Instruction(11, OPCODE_CASE),
+			// Start B3
+			// i12: MOV t1.z, t2.y
+			Instruction(12, OPCODE_MOV, 1, 4, 2, 2),
+			// i13: BREAKC t0.x
+			Instruction(13, OPCODE_BREAKC, 0, 1),
+			// End B3
+			// i14: CASE
+			Instruction(14, OPCODE_CASE),
+			// Start B4
+			// i15: MOV t1.z, t2.z
+			Instruction(15, OPCODE_MOV, 1, 4, 2, 4),
+			// i16: ENDSWITCH
+			Instruction(16, OPCODE_ENDSWITCH),
+			// End B4
+			// Start B5
+			// i17: MOV o0, t1
+			Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
+			// i18: RET
+			Instruction(18, OPCODE_RET)
+			// End B5
+		};
+
+		ControlFlowGraph cfg;
+		const BasicBlock &root = cfg.Build(inst);
+
+		CHECK_EQUAL(&inst[0], root.First());
+		CHECK_EQUAL(&inst[4], root.Last());
+
+		const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]);
+		const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]);
+		const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]);
+		const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]);
+		const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]);
+
+		CHECK(b1 != NULL);
+		CHECK(b2 != NULL);
+		CHECK(b3 != NULL);
+		CHECK(b4 != NULL);
+		CHECK(b5 != NULL);
+
+		// Check instruction ranges
+		CHECK_EQUAL(&inst[8], b1->Last());
+		CHECK_EQUAL(&inst[10], b2->Last());
+		CHECK_EQUAL(&inst[13], b3->Last());
+		CHECK_EQUAL(&inst[16], b4->Last());
+		CHECK_EQUAL(&inst[18], b5->Last());
+
+		// Nothing before the root, nothing after b5
+		CHECK(root.Preceding().empty());
+		CHECK(b5->Succeeding().empty());
+
+		// Check that all connections are there and no others.
+
+		// B0->B1
+		// B0->B2
+		// B0->B3
+		// B0->B4
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[7]));
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[9]));
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[12]));
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[15]));
+
+		CHECK_EQUAL(4, root.Succeeding().size());
+
+		// B1
+
+		// B1->B2
+		CHECK_EQUAL(1, b1->Succeeding().count(&inst[9]));
+		CHECK_EQUAL(1, b1->Succeeding().size());
+
+		// B0->B1, reverse
+		CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
+		CHECK_EQUAL(1, b1->Preceding().size());
+
+		// B2
+
+		// B2->B5
+		CHECK_EQUAL(1, b2->Succeeding().count(&inst[17]));
+		CHECK_EQUAL(1, b2->Succeeding().size());
+		CHECK_EQUAL(1, b2->Preceding().count(&inst[7]));
+		CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
+		CHECK_EQUAL(2, b2->Preceding().size());
+
+		// B3
+		// B3->B4
+		// B3->B5
+		CHECK_EQUAL(1, b3->Succeeding().count(&inst[15]));
+		CHECK_EQUAL(1, b3->Succeeding().count(&inst[17]));
+		CHECK_EQUAL(2, b3->Succeeding().size());
+		CHECK_EQUAL(1, b3->Preceding().count(&inst[0]));
+		CHECK_EQUAL(1, b3->Preceding().size());
+
+		// B4
+		CHECK_EQUAL(1, b4->Succeeding().count(&inst[17]));
+		CHECK_EQUAL(1, b4->Succeeding().size());
+		CHECK_EQUAL(1, b4->Preceding().count(&inst[0]));
+		CHECK_EQUAL(2, b4->Preceding().size());
+
+		// B5
+		CHECK_EQUAL(0, b5->Succeeding().size());
+		CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4
+		CHECK_EQUAL(1, b5->Preceding().count(&inst[9]));
+		CHECK_EQUAL(1, b5->Preceding().count(&inst[12]));
+		CHECK_EQUAL(1, b5->Preceding().count(&inst[15]));
+
+
+		// Verify reachable sets
+
+		CHECK(root.Reachable() == root.DEDef());
+		CHECK_EQUAL(9, root.Reachable().size());
+
+		// B5 should have these reachables:
+		// t0.x only from b0
+		// t1.xy from b0, i1
+		// t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2)
+		// t1.w from b0, i2
+		// t2.xyzw from b0, i3
+
+		// Cast away const so [] works.
+		BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable();
+
+		CHECK_EQUAL(9, r.size());
+
+		CHECK_EQUAL(1, r[0].size());
+		CHECK_EQUAL(0, r[1].size());
+		CHECK_EQUAL(0, r[2].size());
+		CHECK_EQUAL(0, r[3].size());
+		CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction);
+
+		CHECK_EQUAL(1, r[4].size());
+		CHECK_EQUAL(1, r[5].size());
+		CHECK_EQUAL(3, r[6].size());
+		CHECK_EQUAL(1, r[7].size());
+
+		const BasicBlock::ReachableDefinitionsPerVariable &d = r[6];
+		BasicBlock::ReachableDefinitionsPerVariable t;
+		t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0]));
+		t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0]));
+		t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0]));
+
+		CHECK(t == d);
+
+		CHECK_EQUAL(1, r[8].size());
+		CHECK_EQUAL(1, r[9].size());
+		CHECK_EQUAL(1, r[10].size());
+		CHECK_EQUAL(1, r[11].size());
+
+
+	}
+
+	TEST(ControlFlowGraph_Build_Loop_Works)
+	{
+		Instruction inst[] =
+		{
+			// Start B0
+			// i0: MOV t0.x, I0.x
+			Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
+			// i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should.
+			Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc),
+			// i2: LOOP
+			Instruction(2, OPCODE_LOOP, 1, 2),
+			// End B0 -> B1
+			// Begin B1
+			// i3: MOV t1.x, t0.x
+			Instruction(3, OPCODE_MOV, 1, 1, 0, 1),
+			// i4: BREAKC t0.x
+			Instruction(4, OPCODE_BREAKC, 0, 1),
+			// End B1 -> B2, B3
+			// Begin B2
+			// i5: ADD t0.x, t0.y
+			Instruction(5, OPCODE_ADD, 0, 1, 0, 2),
+			// i6: MOV t1.x, t0.x  // This should never show up as definition
+			Instruction(6, OPCODE_MOV, 1, 1, 0, 1),
+			// i7: ENDLOOP
+			Instruction(7, OPCODE_ENDLOOP),
+			// End B2 -> B1
+			// Start B3
+			// i8: MOV O0.x, t1.x
+			Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1),
+			// i9: RET
+			Instruction(9, OPCODE_RET),
+			// End B3
+		};
+
+		ControlFlowGraph cfg;
+		const BasicBlock &root = cfg.Build(inst);
+
+		CHECK_EQUAL(&inst[0], root.First());
+		CHECK_EQUAL(&inst[2], root.Last());
+
+		const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
+		const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
+		const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]);
+
+		CHECK(b1 != NULL);
+		CHECK(b2 != NULL);
+		CHECK(b3 != NULL);
+
+		// Check instruction ranges
+		CHECK_EQUAL(&inst[4], b1->Last());
+		CHECK_EQUAL(&inst[7], b2->Last());
+		CHECK_EQUAL(&inst[9], b3->Last());
+
+		// Nothing before the root, nothing after b3
+		CHECK(root.Preceding().empty());
+		CHECK(b3->Succeeding().empty());
+
+		// Check that all connections are there and no others.
+
+		// B0->B1
+		CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
+		CHECK_EQUAL(1, root.Succeeding().size());
+
+		// B1
+
+		// B1->B2
+		// B1->B3
+		CHECK_EQUAL(1, b1->Succeeding().count(&inst[5]));
+		CHECK_EQUAL(1, b1->Succeeding().count(&inst[8]));
+		CHECK_EQUAL(2, b1->Succeeding().size());
+
+		// B0->B1, reverse
+		CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
+		// We may also come from B2
+		CHECK_EQUAL(1, b1->Preceding().count(&inst[5]));
+		CHECK_EQUAL(2, b1->Preceding().size());
+
+		// B2
+
+		// B2->B1
+		CHECK_EQUAL(1, b2->Succeeding().count(&inst[3]));
+		CHECK_EQUAL(1, b2->Succeeding().size());
+		CHECK_EQUAL(1, b2->Preceding().count(&inst[3]));
+		CHECK_EQUAL(1, b2->Preceding().size());
+
+		// B3
+		CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
+		CHECK_EQUAL(1, b3->Preceding().size());
+
+		// Verify reachable sets
+
+
+		BasicBlock::ReachableVariables t;
+
+		// B0 DEDef and Reachable
+		t.clear();
+		t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
+		t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
+		t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
+
+		CHECK(root.DEDef() == t);
+		CHECK(root.Reachable() == root.DEDef());
+
+		// B1 DEDef and Reachable 
+		t.clear();
+		t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
+		CHECK(b1->DEDef() == t);
+
+		t = b1->DEDef();
+		// t0.x from i0, t1.y (but not .x) from i1
+		t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
+		t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
+
+		// t0.x from i5, but nothing from i6
+		t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
+		CHECK(b1->Reachable() == t);
+
+		// B2
+		t.clear();
+		t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
+		t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0]));
+		CHECK(b2->DEDef() == t);
+
+		t = b2->DEDef();
+		t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
+
+		CHECK(b2->Reachable() == t);
+
+		// B3
+		t.clear();
+		CHECK(b3->DEDef() == t);
+		// t0.x from i0, t1.y from i1
+		t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
+		t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
+		
+		// t1.x from i3
+		t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
+		
+		// t0.x from i5
+		t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
+		
+		CHECK(b3->Reachable() == t);
+	}
+
+}
+
+#endif
+
--- a/src/ControlFlowGraphUtils.cpp
+++ b/src/ControlFlowGraphUtils.cpp
@ -0,0 +1,121 @@
+
+#include "ControlFlowGraphUtils.h"
+
+#include "internal_includes/debug.h"
+#include "internal_includes/Instruction.h"
+#include "internal_includes/Operand.h"
+
+
+
+// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH
+const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/)
+{
+	const Instruction *inst = psStart;
+	// Skip CASE/DEFAULT/ENDSWITCH/LOOP labels
+	while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP)
+	{
+		// We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it)
+		ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL);
+		if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL)
+			*sawEndSwitch = true;
+		inst++;
+	}
+	return inst;
+
+}
+
+// For a given flow-control instruction, find the corresponding jump location:
+// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
+// For ELSE, find same level ENDIF + 1
+// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
+// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
+// For ENDLOOP, find previous same-level LOOP + 1
+// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
+// For CONTINUE/C the previous LOOP + 1
+// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
+// Note that CASE labels fall through.
+// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
+const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/)
+{
+	const Instruction *inst = psStart;
+	int depth = 0;
+	OPCODE_TYPE op = psStart->eOpcode;
+	ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC
+		|| op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT
+		|| op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC);
+
+	switch (op)
+	{
+	default:
+		ASSERT(0);
+		break;
+	case OPCODE_IF:
+	case OPCODE_ELSE:
+		while (1)
+		{
+			inst++;
+			if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0))
+			{
+				return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
+			}
+			if (inst->eOpcode == OPCODE_IF)
+				depth++;
+			if (inst->eOpcode == OPCODE_ENDIF)
+				depth--;
+		}
+	case OPCODE_BREAK:
+	case OPCODE_BREAKC:
+		while (1)
+		{
+			inst++;
+			if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
+			{
+				return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
+			}
+			if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP)
+				depth++;
+			if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP)
+				depth--;
+		}
+	case OPCODE_CONTINUE:
+	case OPCODE_CONTINUEC:
+	case OPCODE_ENDLOOP:
+		while (1)
+		{
+			inst--;
+			if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0))
+			{
+				return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
+			}
+			if (inst->eOpcode == OPCODE_LOOP)
+				depth--;
+			if (inst->eOpcode == OPCODE_ENDLOOP)
+				depth++;
+		}
+	case OPCODE_SWITCH:
+	case OPCODE_CASE:
+	case OPCODE_DEFAULT:
+		while (1)
+		{
+			inst++;
+			if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
+			{
+				// Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH
+				// so that BasicBlock::Build can distinguish between there being a direct route
+				// from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not.
+
+				if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0)
+					*sawEndSwitch = true;
+
+				return GetNextNonLabelInstruction(inst + 1, needConnectToParent);
+			}
+			if (inst->eOpcode == OPCODE_SWITCH)
+				depth++;
+			if (inst->eOpcode == OPCODE_ENDSWITCH)
+				depth--;
+		}
+
+	}
+	return 0;
+}
+
--- a/src/DataTypeAnalysis.cpp
+++ b/src/DataTypeAnalysis.cpp
@ -0,0 +1,769 @@
+
+#include "internal_includes/debug.h"
+#include "internal_includes/tokens.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include "internal_includes/DataTypeAnalysis.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/Instruction.h"
+#include <algorithm>
+
+
+// Helper function to set the vector type of 1 or more components in a vector
+// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done
+// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to
+// to the "highest" type value (ordering int->uint->float)
+static void SetVectorType(std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress)
+{
+	int i = 0;
+
+	// Expand the mask to include all components that are used, also upgrade type
+	for (i = 0; i < 4; i++)
+	{
+		if (aeTempVecType[regBaseIndex + i] != SVT_VOID)
+		{
+			componentMask |= (1 << i);
+			eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]);
+		}
+	}
+
+	// Now componentMask contains the components we actually need to update and eType may have been changed to something else.
+	// Write the results
+	for (i = 0; i < 4; i++)
+	{
+		if (componentMask & (1 << i))
+		{
+			if (aeTempVecType[regBaseIndex + i] != eType)
+			{
+				aeTempVecType[regBaseIndex + i] = eType;
+				if (psMadeProgress)
+					*psMadeProgress = 1;
+			}
+		}
+	}
+
+}
+
+static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault)
+{
+	SHADER_VARIABLE_TYPE eType = eDefault;
+	switch (prec)
+	{
+	case OPERAND_MIN_PRECISION_DEFAULT:
+		break;
+	case OPERAND_MIN_PRECISION_SINT_16:
+		eType = SVT_INT16;
+		break;
+	case OPERAND_MIN_PRECISION_UINT_16:
+		eType = SVT_UINT16;
+		break;
+	case OPERAND_MIN_PRECISION_FLOAT_2_8:
+		eType = SVT_FLOAT10;
+		break;
+	case OPERAND_MIN_PRECISION_FLOAT_16:
+		eType = SVT_FLOAT16;
+		break;
+	default:
+		ASSERT(0); // Catch this to see what's going on.
+		break;
+	}
+	return eType;
+
+}
+
+static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
+{
+	if (psOperand->eType == OPERAND_TYPE_TEMP)
+	{
+		const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4;
+		uint32_t mask = psOperand->GetAccessMask();
+		// Adjust type based on operand precision
+		eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType);
+
+		SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL);
+	}
+}
+
+static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
+{
+	uint32_t i = 0;
+	for (i = 0; i < psInst->ui32NumOperands; i++)
+	{
+		MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType);
+	}
+}
+
+// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again.
+static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand)
+{
+	const ConstantBuffer* psCBuf = NULL;
+	const ShaderVarType* psVarType = NULL;
+	int32_t rebase = 0;
+	bool isArray;
+
+	if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER)
+		return;
+
+	// Ignore selection modes that access more than one component
+	switch (psOperand->eSelMode)
+	{
+	case OPERAND_4_COMPONENT_SELECT_1_MODE:
+		break;
+	case OPERAND_4_COMPONENT_SWIZZLE_MODE:
+		if (!psOperand->IsSwizzleReplicated())
+			return;
+		break;
+	case OPERAND_4_COMPONENT_MASK_MODE:
+		return;
+	}
+
+	psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
+	ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
+
+	if (psVarType->Class == SVC_SCALAR)
+		psOperand->iNumComponents = 1;
+
+}
+
+struct SetPartialDataTypes
+{
+	SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec)
+		: m_TempVec(_aeTempVec)
+	{}
+	SHADER_VARIABLE_TYPE *m_TempVec;
+
+	template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
+	{
+		uint32_t mask = 0;
+		SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
+		SHADER_VARIABLE_TYPE newType;
+		uint32_t i, reg;
+		if (psOperand->eType != OPERAND_TYPE_TEMP)
+			return;
+
+		if (ui32OperandType == FEO_FLAG_SUBOPERAND)
+		{
+			// We really shouldn't ever be getting minprecision float indices here
+			ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8);
+
+			mask = psOperand->GetAccessMask();
+			reg = psOperand->ui32RegisterNumber;
+			newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS);
+			for (i = 0; i < 4; i++)
+			{
+				if (!(mask & (1 << i)))
+					continue;
+				if (aeTempVecType[reg * 4 + i] == SVT_VOID)
+					aeTempVecType[reg * 4 + i] = newType;
+			}
+			return;
+
+		}
+
+		if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
+			return;
+
+		mask = psOperand->GetAccessMask();
+		reg = psOperand->ui32RegisterNumber;
+		newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID);
+		ASSERT(newType != SVT_VOID);
+		for (i = 0; i < 4; i++)
+		{
+			if (!(mask & (1 << i)))
+				continue;
+			aeTempVecType[reg * 4 + i] = newType;
+		}
+		return;
+	}
+};
+
+// Write back the temp datatypes into operands. Also mark scalars in constant buffers
+
+struct WritebackDataTypes
+{
+	WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec)
+		: m_Context(_ctx)
+		, m_TempVec(_aeTempVec)
+	{}
+	HLSLCrossCompilerContext *m_Context;
+	SHADER_VARIABLE_TYPE *m_TempVec;
+
+	template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
+	{
+		SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
+		uint32_t reg, mask, i;
+		SHADER_VARIABLE_TYPE dtype;
+
+		if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
+			SetCBOperandComponents(m_Context, psOperand);
+
+		if (psOperand->eType != OPERAND_TYPE_TEMP)
+			return;
+
+		reg = psOperand->ui32RegisterNumber;
+		mask = psOperand->GetAccessMask();
+		dtype = SVT_VOID;
+
+		for (i = 0; i < 4; i++)
+		{
+			if (!(mask & (1 << i)))
+				continue;
+
+			// Check that all components have the same type
+			ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]);
+
+			dtype = aeTempVecType[reg * 4 + i];
+
+			ASSERT(dtype != SVT_VOID);
+			ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype));
+
+			psOperand->aeDataType[i] = dtype;
+		}
+
+		return;
+	}
+
+};
+
+
+void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> & instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results)
+{
+	uint32_t i;
+	Instruction *psFirstInst = &instructions[0];
+	Instruction *psInst = psFirstInst;
+	// Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float
+	std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType = results;
+
+	aeTempVecType.clear();
+	aeTempVecType.resize(ui32TempCount * 4, SVT_VOID);
+
+	if (ui32TempCount == 0)
+		return;
+
+	// Go through the instructions, pick up partial datatypes, because we at least know those for a fact.
+	// Also set all suboperands to be integers (they're always used as indices)
+	ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0]));
+
+	//	if (psContext->psShader->ui32MajorVersion <= 3)
+	{
+		// First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table
+		// Only ever to int->float promotion (or int->uint), never the other way around
+		for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
+		{
+			int k = 0;
+			if (psInst->ui32NumOperands == 0)
+				continue;
+#ifdef _DEBUG
+			for (k = 0; k < (int)psInst->ui32NumOperands; k++)
+			{
+				if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP)
+				{
+					ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount);
+				}
+			}
+#endif
+
+			switch (psInst->eOpcode)
+			{
+				// All float-only ops
+			case OPCODE_ADD:
+			case OPCODE_DERIV_RTX:
+			case OPCODE_DERIV_RTY:
+			case OPCODE_DIV:
+			case OPCODE_DP2:
+			case OPCODE_DP3:
+			case OPCODE_DP4:
+			case OPCODE_EXP:
+			case OPCODE_FRC:
+			case OPCODE_LOG:
+			case OPCODE_MAD:
+			case OPCODE_MIN:
+			case OPCODE_MAX:
+			case OPCODE_MUL:
+			case OPCODE_ROUND_NE:
+			case OPCODE_ROUND_NI:
+			case OPCODE_ROUND_PI:
+			case OPCODE_ROUND_Z:
+			case OPCODE_RSQ:
+			case OPCODE_SAMPLE:
+			case OPCODE_SAMPLE_C:
+			case OPCODE_SAMPLE_C_LZ:
+			case OPCODE_SAMPLE_L:
+			case OPCODE_SAMPLE_D:
+			case OPCODE_SAMPLE_B:
+			case OPCODE_SQRT:
+			case OPCODE_SINCOS:
+			case OPCODE_LOD:
+			case OPCODE_GATHER4:
+
+			case OPCODE_DERIV_RTX_COARSE:
+			case OPCODE_DERIV_RTX_FINE:
+			case OPCODE_DERIV_RTY_COARSE:
+			case OPCODE_DERIV_RTY_FINE:
+			case OPCODE_GATHER4_C:
+			case OPCODE_GATHER4_PO:
+			case OPCODE_GATHER4_PO_C:
+			case OPCODE_RCP:
+
+				MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
+				break;
+
+				// Comparison ops, need to enable possibility for going boolean
+			case OPCODE_IEQ:
+			case OPCODE_INE:
+				MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
+				break;
+
+			case OPCODE_AND:
+				MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
+				break;
+
+
+			case OPCODE_IF:
+			case OPCODE_BREAKC:
+			case OPCODE_CALLC:
+			case OPCODE_CONTINUEC:
+			case OPCODE_RETC:
+				MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
+				break;
+
+			case OPCODE_ILT:
+			case OPCODE_IGE:
+				MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
+				break;
+
+			case OPCODE_ULT:
+			case OPCODE_UGE:
+				MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
+				break;
+
+				// Integer ops that don't care of signedness
+			case OPCODE_IADD:
+			case OPCODE_INEG:
+			case OPCODE_ISHL:
+			case OPCODE_NOT:
+			case OPCODE_OR:
+			case OPCODE_XOR:
+			case OPCODE_BUFINFO:
+			case OPCODE_COUNTBITS:
+			case OPCODE_FIRSTBIT_HI:
+			case OPCODE_FIRSTBIT_LO:
+			case OPCODE_FIRSTBIT_SHI:
+			case OPCODE_BFI:
+			case OPCODE_BFREV:
+			case OPCODE_ATOMIC_AND:
+			case OPCODE_ATOMIC_OR:
+			case OPCODE_ATOMIC_XOR:
+			case OPCODE_ATOMIC_CMP_STORE:
+			case OPCODE_ATOMIC_IADD:
+			case OPCODE_IMM_ATOMIC_IADD:
+			case OPCODE_IMM_ATOMIC_AND:
+			case OPCODE_IMM_ATOMIC_OR:
+			case OPCODE_IMM_ATOMIC_XOR:
+			case OPCODE_IMM_ATOMIC_EXCH:
+			case OPCODE_IMM_ATOMIC_CMP_EXCH:
+
+
+				MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType);
+				break;
+
+
+				// Integer ops
+			case OPCODE_IMAD:
+			case OPCODE_IMAX:
+			case OPCODE_IMIN:
+			case OPCODE_IMUL:
+			case OPCODE_ISHR:
+			case OPCODE_IBFE:
+
+			case OPCODE_ATOMIC_IMAX:
+			case OPCODE_ATOMIC_IMIN:
+			case OPCODE_IMM_ATOMIC_IMAX:
+			case OPCODE_IMM_ATOMIC_IMIN:
+				MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType);
+				break;
+
+
+				// uint ops
+			case OPCODE_UDIV:
+			case OPCODE_UMUL:
+			case OPCODE_UMAD:
+			case OPCODE_UMAX:
+			case OPCODE_UMIN:
+			case OPCODE_USHR:
+			case OPCODE_UADDC:
+			case OPCODE_USUBB:
+			case OPCODE_ATOMIC_UMAX:
+			case OPCODE_ATOMIC_UMIN:
+			case OPCODE_IMM_ATOMIC_UMAX:
+			case OPCODE_IMM_ATOMIC_UMIN:
+			case OPCODE_IMM_ATOMIC_ALLOC:
+			case OPCODE_IMM_ATOMIC_CONSUME:
+				MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType);
+				break;
+			case OPCODE_UBFE:
+				MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType);
+				break;
+
+				// Need special handling
+			case OPCODE_FTOI:
+			case OPCODE_FTOU:
+				MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
+				break;
+
+			case OPCODE_GE:
+			case OPCODE_LT:
+			case OPCODE_EQ:
+			case OPCODE_NE:
+
+				MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType);
+				break;
+
+			case OPCODE_ITOF:
+			case OPCODE_UTOF:
+				MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType);
+				break;
+
+			case OPCODE_LD:
+			case OPCODE_LD_MS:
+				// TODO: Would need to know the sampler return type
+				MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
+				break;
+
+			case OPCODE_MOVC:
+				MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
+			case OPCODE_SWAPC:
+				MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
+				break;
+
+			case OPCODE_RESINFO:
+			{
+								   if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT)
+									   MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
+								   break;
+			}
+
+			case OPCODE_SAMPLE_INFO:
+				// TODO decode the _uint flag
+				MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				break;
+
+			case OPCODE_SAMPLE_POS:
+				MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				break;
+
+
+			case OPCODE_LD_UAV_TYPED:
+				// translates to gvec4 loadImage(gimage i, ivec p).
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
+				break;
+
+			case OPCODE_STORE_UAV_TYPED:
+				// translates to storeImage(gimage i, ivec p, gvec4 data)
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data
+				break;
+
+			case OPCODE_LD_RAW:
+				if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
+					MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
+				else
+					MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				break;
+				
+			case OPCODE_STORE_RAW:
+				if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
+					MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
+				else
+					MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				break;
+
+			case OPCODE_LD_STRUCTURED:
+				MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
+				break;
+
+			case OPCODE_STORE_STRUCTURED:
+				MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
+				MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType);
+				break;
+
+			case OPCODE_F32TOF16:
+			case OPCODE_F16TOF32:
+				// TODO
+				ASSERT(0);
+				break;
+
+
+
+				// No-operands, should never get here anyway
+				/*				case OPCODE_BREAK:
+				case OPCODE_CALL:
+				case OPCODE_CASE:
+				case OPCODE_CONTINUE:
+				case OPCODE_CUT:
+				case OPCODE_DEFAULT:
+				case OPCODE_DISCARD:
+				case OPCODE_ELSE:
+				case OPCODE_EMIT:
+				case OPCODE_EMITTHENCUT:
+				case OPCODE_ENDIF:
+				case OPCODE_ENDLOOP:
+				case OPCODE_ENDSWITCH:
+
+				case OPCODE_LABEL:
+				case OPCODE_LOOP:
+				case OPCODE_CUSTOMDATA:
+				case OPCODE_NOP:
+				case OPCODE_RET:
+				case OPCODE_SWITCH:
+				case OPCODE_DCL_RESOURCE: // DCL* opcodes have
+				case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
+				case OPCODE_DCL_SAMPLER:
+				case OPCODE_DCL_INDEX_RANGE:
+				case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+				case OPCODE_DCL_GS_INPUT_PRIMITIVE:
+				case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+				case OPCODE_DCL_INPUT:
+				case OPCODE_DCL_INPUT_SGV:
+				case OPCODE_DCL_INPUT_SIV:
+				case OPCODE_DCL_INPUT_PS:
+				case OPCODE_DCL_INPUT_PS_SGV:
+				case OPCODE_DCL_INPUT_PS_SIV:
+				case OPCODE_DCL_OUTPUT:
+				case OPCODE_DCL_OUTPUT_SGV:
+				case OPCODE_DCL_OUTPUT_SIV:
+				case OPCODE_DCL_TEMPS:
+				case OPCODE_DCL_INDEXABLE_TEMP:
+				case OPCODE_DCL_GLOBAL_FLAGS:
+
+
+				case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader
+				case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader
+				case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader
+				case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader
+
+				case OPCODE_EMIT_STREAM:
+				case OPCODE_CUT_STREAM:
+				case OPCODE_EMITTHENCUT_STREAM:
+				case OPCODE_INTERFACE_CALL:
+
+
+				case OPCODE_DCL_STREAM:
+				case OPCODE_DCL_FUNCTION_BODY:
+				case OPCODE_DCL_FUNCTION_TABLE:
+				case OPCODE_DCL_INTERFACE:
+
+				case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+				case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+				case OPCODE_DCL_TESS_DOMAIN:
+				case OPCODE_DCL_TESS_PARTITIONING:
+				case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+				case OPCODE_DCL_HS_MAX_TESSFACTOR:
+				case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+				case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+
+				case OPCODE_DCL_THREAD_GROUP:
+				case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+				case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+				case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+				case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+				case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+				case OPCODE_DCL_RESOURCE_RAW:
+				case OPCODE_DCL_RESOURCE_STRUCTURED:
+				case OPCODE_SYNC:
+
+				// TODO
+				case OPCODE_DADD:
+				case OPCODE_DMAX:
+				case OPCODE_DMIN:
+				case OPCODE_DMUL:
+				case OPCODE_DEQ:
+				case OPCODE_DGE:
+				case OPCODE_DLT:
+				case OPCODE_DNE:
+				case OPCODE_DMOV:
+				case OPCODE_DMOVC:
+				case OPCODE_DTOF:
+				case OPCODE_FTOD:
+
+				case OPCODE_EVAL_SNAPPED:
+				case OPCODE_EVAL_SAMPLE_INDEX:
+				case OPCODE_EVAL_CENTROID:
+
+				case OPCODE_DCL_GS_INSTANCE_COUNT:
+
+				case OPCODE_ABORT:
+				case OPCODE_DEBUG_BREAK:*/
+
+			default:
+				break;
+			}
+		}
+	}
+
+	{
+		int madeProgress = 0;
+		// Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have
+		do
+		{
+			madeProgress = 0;
+			psInst = psFirstInst;
+			for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
+			{
+				if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC)
+				{
+					// Figure out the data type
+					uint32_t k;
+					SHADER_VARIABLE_TYPE dataType = SVT_VOID;
+					int foundImmediate = 0;
+					for (k = 0; k < psInst->ui32NumOperands; k++)
+					{
+						uint32_t mask, j;
+						if (psInst->eOpcode == OPCODE_MOVC && k == 1)
+							continue; // Ignore the condition operand, it's always int
+
+						if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32)
+						{
+							foundImmediate = 1;
+							continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed
+						}
+
+						if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
+						{
+							dataType = psInst->asOperands[k].GetDataType(psContext);
+							break;
+						}
+
+						if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE)
+						{
+							// If any modifiers are used in MOV or MOVC, that automatically is treated as float.
+							dataType = SVT_FLOAT;
+							break;
+						}
+
+						mask = psInst->asOperands[k].GetAccessMask();
+						for (j = 0; j < 4; j++)
+						{
+							if (!(mask & (1 << j)))
+								continue;
+							if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID)
+							{
+								dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]);
+							}
+						}
+					}
+
+					if (foundImmediate && dataType == SVT_VOID)
+						dataType = SVT_INT;
+
+					if (dataType != SVT_VOID)
+					{
+						// Found data type, write to all operands
+						// First adjust it to not have precision qualifiers in it
+						switch (dataType)
+						{
+						case SVT_FLOAT10:
+						case SVT_FLOAT16:
+							dataType = SVT_FLOAT;
+							break;
+						case SVT_INT12:
+						case SVT_INT16:
+							dataType = SVT_INT;
+							break;
+						case SVT_UINT16:
+						case SVT_UINT8:
+							dataType = SVT_UINT;
+							break;
+						default:
+							break;
+						}
+						for (k = 0; k < psInst->ui32NumOperands; k++)
+						{
+							uint32_t mask;
+							if (psInst->eOpcode == OPCODE_MOVC && k == 1)
+								continue; // Ignore the condition operand, it's always int
+
+							if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
+								continue;
+							if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
+								continue;
+
+							mask = psInst->asOperands[k].GetAccessMask();
+							SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress);
+
+						}
+
+					}
+				}
+			}
+		} while (madeProgress != 0);
+	}
+
+
+	// translate forced_int and int_ambiguous back to int
+	for (i = 0; i < ui32TempCount * 4; i++)
+	{
+		if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS)
+			aeTempVecType[i] = SVT_INT;
+	}
+
+	ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0]));
+
+	// Propagate boolean data types over logical operators
+	bool didProgress = false;
+	do
+	{
+		didProgress = false;
+		std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i)
+		{
+			if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR)
+				&& (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL)
+				&& (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL))
+			{
+				// Check if all uses see only this define
+				bool isStandalone = true;
+				std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u)
+				{
+					if (u.m_Op->m_Defines.size() > 1)
+						isStandalone = false;
+				});
+
+				if (isStandalone)
+				{
+					didProgress = true;
+					// Change data type of this and all uses
+					i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL;
+					uint32_t reg = i.asOperands[0].ui32RegisterNumber;
+					aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL;
+
+					std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u)
+					{
+						u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL;
+					});
+				}
+			}
+		});
+	} while (didProgress);
+
+}
--- a/src/Declaration.cpp
+++ b/src/Declaration.cpp
@ -0,0 +1,2 @@
+
+#include "internal_includes/Declaration.h"
--- a/src/HLSLCrossCompilerContext.cpp
+++ b/src/HLSLCrossCompilerContext.cpp
@ -0,0 +1,253 @@
+
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/DataTypeAnalysis.h"
+#include "internal_includes/UseDefineChains.h"
+#include "internal_includes/Declaration.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/Translator.h"
+#include "internal_includes/ControlFlowGraph.h"
+#include <sstream>
+
+void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
+{
+	size_t ui32DeclCount = psPhase->psDecl.size();
+	uint32_t i;
+
+	psPhase->psTempDeclaration = NULL;
+	psPhase->ui32OrigTemps = 0;
+	psPhase->ui32TotalTemps = 0;
+
+	// Retrieve the temp decl count
+	for (i = 0; i < ui32DeclCount; ++i)
+	{
+		if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
+		{
+			psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps;
+			psPhase->psTempDeclaration = &psPhase->psDecl[i];
+			break;
+		}
+	}
+
+	if (psPhase->ui32TotalTemps == 0)
+		return;
+
+	psPhase->ui32OrigTemps = psPhase->ui32TotalTemps;
+
+	// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
+	// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
+	psPhase->pui32SplitInfo.clear();
+	psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff);
+
+	// Build use-define chains and split temps based on those.
+	{
+		DefineUseChains duChains;
+		UseDefineChains udChains;
+
+		BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG());
+
+		CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
+
+		// Only do sampler precision downgrade on pixel shaders.
+		if (psShader->eShaderType == PIXEL_SHADER)
+			UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
+
+		UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
+
+		WriteBackUsesAndDefines(duChains);
+	}
+	
+	HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes);
+
+	if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps))
+		psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
+}
+
+void HLSLCrossCompilerContext::ClearDependencyData()
+{
+
+	switch (psShader->eShaderType)
+	{
+	case PIXEL_SHADER:
+	{
+		psDependencies->ClearCrossDependencyData();
+	}
+	case HULL_SHADER:
+	{
+		psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
+		psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
+						break;
+	}
+	default:
+		break;
+	}
+}
+
+void HLSLCrossCompilerContext::AddIndentation()
+{
+	int i;
+	bstring glsl = *currentGLSLString;
+	for (i = 0; i < indent; ++i)
+	{
+		bcatcstr(glsl, "    ");
+	}
+}
+
+
+std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
+{
+	std::ostringstream oss;
+	const ShaderInfo::InOutSignature* psIn = NULL;
+	int regSpace = psOperand->GetRegisterSpace(this);
+
+	if (iIgnoreRedirect == 0)
+	{
+		if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
+			||
+			(regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
+		{
+			oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber;
+			if (piRebase)
+				*piRebase = 0;
+			return oss.str();
+		}
+	}
+
+	if (regSpace == 0)
+		psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
+	else
+		psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
+
+	if (psIn && piRebase)
+		*piRebase = psIn->iRebase;
+
+	std::string res = "";
+	bool skipPrefix = false;
+	if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix))
+	{
+		if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
+			return inputPrefix + res;
+		else
+			return res;
+	}
+
+	ASSERT(psIn != NULL);
+	oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex;
+	return oss.str();
+}
+
+
+std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand,
+	int* piStream,
+	uint32_t *puiIgnoreSwizzle,
+	int *piRebase,
+	int iIgnoreRedirect) const
+{
+	std::ostringstream oss;
+	const ShaderInfo::InOutSignature* psOut = NULL;
+	int regSpace = psOperand->GetRegisterSpace(this);
+
+	if (iIgnoreRedirect == 0)
+	{
+		if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
+			|| (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
+		{
+			oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber;
+			if (piRebase)
+				*piRebase = 0;
+			return oss.str();
+		}
+	}
+
+	if (regSpace == 0)
+		psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true);
+	else
+		psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true);
+
+
+	if (psOut && piRebase)
+		*piRebase = psOut->iRebase;
+
+	if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end()))
+	{
+		// Need to route through temp output variable
+		oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second;
+		if (!psOperand->m_SubOperands[0].get())
+		{
+			oss << "[" << psOperand->ui32RegisterNumber << "]";
+		}
+		if (piRebase)
+			*piRebase = 0;
+		return oss.str();
+	}
+
+	std::string res = "";
+	if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false))
+	{
+		if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
+			return outputPrefix + res;
+		else
+			return res;
+	}
+	ASSERT(psOut != NULL);
+
+	oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex;
+	return oss.str();
+}
+
+bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count)
+{
+	char compMask = (char)psOperand->ui32CompMask;
+	int regSpace = psOperand->GetRegisterSpace(this);
+	uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams
+	ASSERT(psShader->ui32CurrentVertexOutputStream < 4);
+
+	// First check for various builtins, mostly depth-output ones.
+	if (psShader->eShaderType == PIXEL_SHADER)
+	{
+		if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL ||
+			psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL)
+		{
+			return true;
+		}
+
+		if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH)
+		{
+			// GL doesn't need declaration, Metal does.
+			return psShader->eTargetLanguage == LANG_METAL;
+		}
+	}
+
+	// Needs declaring if any of the components hasn't been already declared
+	if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0)
+	{
+		int offset;
+		const ShaderInfo::InOutSignature* psSignature = NULL;
+
+		if (psOperand->eSpecialName == NAME_UNDEFINED)
+		{
+			// Need to fetch the actual comp mask
+			if (regSpace == 0)
+				psShader->sInfo.GetOutputSignatureFromRegister(
+					psOperand->ui32RegisterNumber,
+					psOperand->ui32CompMask,
+					psShader->ui32CurrentVertexOutputStream,
+					&psSignature);
+			else
+				psShader->sInfo.GetPatchConstantSignatureFromRegister(
+					psOperand->ui32RegisterNumber,
+					psOperand->ui32CompMask,
+					&psSignature);
+
+			compMask = (char)psSignature->ui32Mask;
+		}
+		for (offset = 0; offset < count; offset++)
+		{
+			psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask;
+		}
+		return true;
+	}
+
+	return false;
+}
--- a/src/HLSLcc.cpp
+++ b/src/HLSLcc.cpp
@ -0,0 +1,212 @@
+
+#include "hlslcc.h"
+
+#include <memory>
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/toGLSL.h"
+#include "internal_includes/toMetal.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/decode.h"
+
+
+#ifndef GL_VERTEX_SHADER_ARB
+#define GL_VERTEX_SHADER_ARB              0x8B31
+#endif
+#ifndef GL_FRAGMENT_SHADER_ARB
+#define GL_FRAGMENT_SHADER_ARB            0x8B30
+#endif
+#ifndef GL_GEOMETRY_SHADER
+#define GL_GEOMETRY_SHADER 0x8DD9
+#endif
+#ifndef GL_TESS_EVALUATION_SHADER
+#define GL_TESS_EVALUATION_SHADER 0x8E87
+#endif
+#ifndef GL_TESS_CONTROL_SHADER
+#define GL_TESS_CONTROL_SHADER 0x8E88
+#endif
+#ifndef GL_COMPUTE_SHADER
+#define GL_COMPUTE_SHADER 0x91B9
+#endif
+
+
+HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
+	unsigned int flags,
+	GLLang language,
+	const GlExtensions *extensions,
+	GLSLCrossDependencyData* dependencies,
+	HLSLccSamplerPrecisionInfo& samplerPrecisions,
+	HLSLccReflection& reflectionCallbacks,
+	GLSLShader* result)
+{
+	uint32_t* tokens;
+	char* glslcstr = NULL;
+	int GLSLShaderType = GL_FRAGMENT_SHADER_ARB;
+	int success = 0;
+	uint32_t i;
+
+	tokens = (uint32_t*)shader;
+
+	std::auto_ptr<Shader> psShader(DecodeDXBC(tokens, flags));
+
+	if (psShader.get())
+	{
+		HLSLCrossCompilerContext sContext(reflectionCallbacks);
+
+		// Add shader precisions from the list
+		psShader->sInfo.AddSamplerPrecisions(samplerPrecisions);
+
+		if (psShader->ui32MajorVersion <= 3)
+		{
+			flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
+		}
+
+		sContext.psShader = psShader.get();
+		sContext.flags = flags;
+		sContext.psDependencies = dependencies;
+
+		for (i = 0; i < psShader->asPhases.size(); ++i)
+		{
+			psShader->asPhases[i].hasPostShaderCode = 0;
+		}
+
+		if (language == LANG_METAL)
+		{
+			// Tessellation or geometry shaders are not supported
+			if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER)
+			{
+				result->sourceCode = "";
+				return 0;
+			}
+			ToMetal translator(&sContext);
+			if(!translator.Translate())
+			{
+				bdestroy(sContext.glsl);
+				for (i = 0; i < psShader->asPhases.size(); ++i)
+				{
+					bdestroy(psShader->asPhases[i].postShaderCode);
+					bdestroy(psShader->asPhases[i].earlyMain);
+				}
+
+				return 0;
+			}
+		}
+		else
+		{
+			ToGLSL translator(&sContext);
+			language = translator.SetLanguage(language);
+			translator.SetExtensions(extensions);
+			if (!translator.Translate())
+			{
+				bdestroy(sContext.glsl);
+				for (i = 0; i < psShader->asPhases.size(); ++i)
+				{
+					bdestroy(psShader->asPhases[i].postShaderCode);
+					bdestroy(psShader->asPhases[i].earlyMain);
+				}
+
+				return 0;
+			}
+		}
+
+		switch (psShader->eShaderType)
+		{
+		case VERTEX_SHADER:
+		{
+							  GLSLShaderType = GL_VERTEX_SHADER_ARB;
+							  break;
+		}
+		case GEOMETRY_SHADER:
+		{
+								GLSLShaderType = GL_GEOMETRY_SHADER;
+								break;
+		}
+		case DOMAIN_SHADER:
+		{
+							  GLSLShaderType = GL_TESS_EVALUATION_SHADER;
+							  break;
+		}
+		case HULL_SHADER:
+		{
+							GLSLShaderType = GL_TESS_CONTROL_SHADER;
+							break;
+		}
+		case COMPUTE_SHADER:
+		{
+							   GLSLShaderType = GL_COMPUTE_SHADER;
+							   break;
+		}
+		default:
+		{
+				   break;
+		}
+		}
+
+		glslcstr = bstr2cstr(sContext.glsl, '\0');
+		result->sourceCode = glslcstr;
+		bcstrfree(glslcstr);
+
+		bdestroy(sContext.glsl);
+		for (i = 0; i < psShader->asPhases.size(); ++i)
+		{
+			bdestroy(psShader->asPhases[i].postShaderCode);
+			bdestroy(psShader->asPhases[i].earlyMain);
+		}
+
+		result->reflection = psShader->sInfo;
+
+		result->textureSamplers = psShader->textureSamplers;
+
+		success = 1;
+	}
+
+	shader = 0;
+	tokens = 0;
+
+	/* Fill in the result struct */
+
+	result->shaderType = GLSLShaderType;
+	result->GLSLLanguage = language;
+
+	return success;
+}
+
+HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
+	unsigned int flags,
+	GLLang language,
+	const GlExtensions *extensions,
+	GLSLCrossDependencyData* dependencies,
+	HLSLccSamplerPrecisionInfo& samplerPrecisions,
+	HLSLccReflection& reflectionCallbacks,
+	GLSLShader* result)
+{
+	FILE* shaderFile;
+	int length;
+	size_t readLength;
+	std::vector<char> shader;
+	int success = 0;
+
+	shaderFile = fopen(filename, "rb");
+
+	if (!shaderFile)
+	{
+		return 0;
+	}
+
+	fseek(shaderFile, 0, SEEK_END);
+	length = ftell(shaderFile);
+	fseek(shaderFile, 0, SEEK_SET);
+
+	shader.reserve(length + 1);
+
+	readLength = fread(&shader[0], 1, length, shaderFile);
+
+	fclose(shaderFile);
+	shaderFile = 0;
+
+	shader[readLength] = '\0';
+
+	success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result);
+
+	return success;
+}
+
--- a/src/HLSLccToolkit.cpp
+++ b/src/HLSLccToolkit.cpp
@ -0,0 +1,482 @@
+
+#include "internal_includes/HLSLccToolkit.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/toGLSLOperand.h"
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/Shader.h"
+#include <sstream>
+#include <cmath>
+
+namespace HLSLcc
+{
+	uint32_t GetNumberBitsSet(uint32_t a)
+	{
+		// Calculate number of bits in a
+		// Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
+		// Works only up to 14 bits (we're only using up to 4)
+		return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
+	}
+
+	uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType)
+	{
+		if (eType == SVT_FLOAT16)
+		{
+			return TO_FLAG_FORCE_HALF;
+		}
+		if (eType == SVT_UINT || eType == SVT_UINT16)
+		{
+			return TO_FLAG_UNSIGNED_INTEGER;
+		}
+		else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12)
+		{
+			return TO_FLAG_INTEGER;
+		}
+		else if (eType == SVT_BOOL)
+		{
+			return TO_FLAG_BOOL;
+		}
+		else
+		{
+			return TO_FLAG_NONE;
+		}
+	}
+
+	SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags)
+	{
+		if (typeflags & TO_FLAG_FORCE_HALF)
+			return SVT_FLOAT16;
+		if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT))
+			return SVT_INT;
+		if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT))
+			return SVT_UINT;
+		if (typeflags & TO_FLAG_BOOL)
+			return SVT_BOOL;
+		return SVT_FLOAT;
+	}
+
+	const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
+		const int components, bool useGLSLPrecision)
+	{
+		static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
+		static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
+		static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" };
+		static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" };
+		static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" };
+		static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" };
+		static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" };
+		static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" };
+		static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
+
+		ASSERT(components >= 1 && components <= 4);
+
+		switch (eType)
+		{
+		case SVT_UINT:
+			return uintTypes[components];
+		case SVT_UINT16:
+			return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
+		case SVT_INT:
+			return intTypes[components];
+		case SVT_INT16:
+			return useGLSLPrecision ? int16Types[components] : intTypes[components];
+		case SVT_INT12:
+			return useGLSLPrecision ? int12Types[components] : intTypes[components];
+		case SVT_FLOAT:
+			return floatTypes[components];
+		case SVT_FLOAT16:
+			return useGLSLPrecision ? float16Types[components] : floatTypes[components];
+		case SVT_FLOAT10:
+			return useGLSLPrecision ? float10Types[components] : floatTypes[components];
+		case SVT_BOOL:
+			return boolTypes[components];
+		default:
+			ASSERT(0);
+			return " ";
+		}
+	}
+
+	const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
+		const int components)
+	{
+		static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
+		static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
+		static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" };
+		static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" };
+		static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" };
+		static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" };
+		static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" };
+
+		ASSERT(components >= 1 && components <= 4);
+
+		switch (eType)
+		{
+		case SVT_UINT:
+			return uintTypes[components];
+		case SVT_UINT16:
+			return ushortTypes[components];
+		case SVT_INT:
+			return intTypes[components];
+		case SVT_INT16:
+		case SVT_INT12:
+			return shortTypes[components];
+		case SVT_FLOAT:
+			return floatTypes[components];
+		case SVT_FLOAT16:
+		case SVT_FLOAT10:
+			return halfTypes[components];
+		case SVT_BOOL:
+			return boolTypes[components];
+		default:
+			ASSERT(0);
+			return " ";
+		}
+	}
+
+	const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/)
+	{
+		if (psContext->psShader->eTargetLanguage == LANG_METAL)
+			return GetConstructorForTypeMetal(eType, components);
+		else
+			return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision);
+	}
+
+	std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
+	{
+		std::string result;
+		std::ostringstream oss;
+		if (psContext->psShader->eTargetLanguage == LANG_METAL)
+		{
+			switch (eBaseType)
+			{
+			case SVT_FLOAT:
+				oss << "float" << columns << "x" << rows;
+				break;
+			case SVT_FLOAT16:
+			case SVT_FLOAT10:
+				oss << "half" << columns << "x" << rows;
+				break;
+			default:
+				ASSERT(0);
+				break;
+			}
+		}
+		else
+		{
+			switch (eBaseType)
+			{
+			case SVT_FLOAT:
+				oss << "mat" << columns << "x" << rows;
+				break;
+			case SVT_FLOAT16:
+				oss << "mediump mat" << columns << "x" << rows;
+				break;
+			case SVT_FLOAT10:
+				oss << "lowp mat" << columns << "x" << rows;
+				break;
+			default:
+				ASSERT(0);
+				break;
+			}
+
+		}
+		result = oss.str();
+		return result;
+	}
+
+	void AddSwizzleUsingElementCount(bstring dest, uint32_t count)
+	{
+		if (count == 4)
+			return;
+		if (count)
+		{
+			bcatcstr(dest, ".");
+			bcatcstr(dest, "x");
+			count--;
+		}
+		if (count)
+		{
+			bcatcstr(dest, "y");
+			count--;
+		}
+		if (count)
+		{
+			bcatcstr(dest, "z");
+			count--;
+		}
+		if (count)
+		{
+			bcatcstr(dest, "w");
+			count--;
+		}
+	}
+
+	// Calculate the bits set in mask
+	int WriteMaskToComponentCount(uint32_t writeMask)
+	{
+		// In HLSL bytecode writemask 0 also means everything
+		if (writeMask == 0)
+			return 4;
+		
+		return (int)GetNumberBitsSet(writeMask);
+	}
+
+	uint32_t BuildComponentMaskFromElementCount(int count)
+	{
+		// Translate numComponents into bitmask
+		// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
+		return (1 << count) - 1;
+	}
+
+	// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
+	bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src)
+	{
+		if (src == dest)
+			return true;
+
+		if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) &&
+			(src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16))
+			return true;
+
+		if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) &&
+			(src == SVT_INT || src == SVT_INT12 || src == SVT_INT16))
+			return true;
+
+		if ((dest == SVT_UINT || dest == SVT_UINT16) &&
+			(src == SVT_UINT || src == SVT_UINT16))
+			return true;
+
+		return false;
+	}
+
+	uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType)
+	{
+		if (eType == RETURN_TYPE_SINT)
+		{
+			return TO_FLAG_INTEGER;
+		}
+		else if (eType == RETURN_TYPE_UINT)
+		{
+			return TO_FLAG_UNSIGNED_INTEGER;
+		}
+		else
+		{
+			return TO_FLAG_NONE;
+		}
+	}
+
+	SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec)
+	{
+		if (eType == RETURN_TYPE_SINT)
+		{
+			switch (ePrec)
+			{
+			default:
+				return SVT_INT;
+			case REFLECT_RESOURCE_PRECISION_LOWP:
+				return SVT_INT12;
+			case REFLECT_RESOURCE_PRECISION_MEDIUMP:
+				return SVT_INT16;
+			}
+		}
+		else if (eType == RETURN_TYPE_UINT)
+		{
+			switch (ePrec)
+			{
+			default:
+				return SVT_UINT;
+			case REFLECT_RESOURCE_PRECISION_LOWP:
+				return SVT_UINT8;
+			case REFLECT_RESOURCE_PRECISION_MEDIUMP:
+				return SVT_UINT16;
+			}
+		}
+		else
+		{
+			switch (ePrec)
+			{
+			default:
+				return SVT_FLOAT;
+			case REFLECT_RESOURCE_PRECISION_LOWP:
+				return SVT_FLOAT10;
+			case REFLECT_RESOURCE_PRECISION_MEDIUMP:
+				return SVT_FLOAT16;
+			}
+		}
+	}
+
+
+	uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
+	{
+		return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
+	}
+
+	// Returns true if the operation is commutative
+	bool IsOperationCommutative(int eOpCode)
+	{
+		switch ((OPCODE_TYPE)eOpCode)
+		{
+		case OPCODE_DADD:
+		case OPCODE_IADD:
+		case OPCODE_ADD:
+		case OPCODE_MUL:
+		case OPCODE_IMUL:
+		case OPCODE_OR:
+		case OPCODE_AND:
+			return true;
+		default:
+			return false;
+		};
+	}
+
+	// Returns true if operands are identical, only cares about temp registers currently.
+	bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB)
+	{
+		if (!psA || !psB)
+			return 0;
+
+		if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP)
+			return 0;
+
+		if (psA->eModifier != psB->eModifier)
+			return 0;
+
+		if (psA->iNumComponents != psB->iNumComponents)
+			return 0;
+
+		if (psA->ui32RegisterNumber != psB->ui32RegisterNumber)
+			return 0;
+
+		if (psA->eSelMode != psB->eSelMode)
+			return 0;
+
+		if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask)
+			return 0;
+
+		if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0])
+			return 0;
+
+		if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0]))
+			return 0;
+
+		return 1;
+	}
+
+	bool IsAddOneInstruction(const Instruction *psInst)
+	{
+		if (psInst->eOpcode != OPCODE_IADD)
+			return false;
+		if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
+			return false;
+
+		if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP)
+		{
+			if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
+				return false;
+			if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32)
+				return false;
+
+			if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1)
+				return false;
+		}
+		else
+		{
+			if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32)
+				return false;
+			if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP)
+				return false;
+
+			if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
+				return false;
+
+			if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1)
+				return false;
+		}
+		return true;
+	}
+
+
+	int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim)
+	{
+		switch ((RESOURCE_DIMENSION)eResDim)
+		{
+			case RESOURCE_DIMENSION_TEXTURE1D:
+				return 1;
+			case RESOURCE_DIMENSION_TEXTURE2D:
+			case RESOURCE_DIMENSION_TEXTURE1DARRAY:
+			case RESOURCE_DIMENSION_TEXTURECUBE:
+				return 2;
+			case RESOURCE_DIMENSION_TEXTURE3D:
+			case RESOURCE_DIMENSION_TEXTURE2DARRAY:
+			case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
+				return 3;
+			default:
+				ASSERT(0);
+				break;
+		}
+		return 0;
+	}
+
+	// Returns the "more important" type of a and b, currently int < uint < float
+	SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b)
+	{
+#define DO_CHECK(type) if( a == type || b == type ) return type
+
+		// Priority ordering
+		DO_CHECK(SVT_FLOAT16);
+		DO_CHECK(SVT_FLOAT10);
+		DO_CHECK(SVT_UINT16);
+		DO_CHECK(SVT_UINT8);
+		DO_CHECK(SVT_INT16);
+		DO_CHECK(SVT_INT12);
+		DO_CHECK(SVT_FORCED_INT);
+		DO_CHECK(SVT_FLOAT);
+		DO_CHECK(SVT_UINT);
+		DO_CHECK(SVT_INT);
+		DO_CHECK(SVT_INT_AMBIGUOUS);
+
+#undef DO_CHECK
+		// After these just rely on ordering.
+		return a > b ? a : b;
+	}
+
+	// Returns true if a direct constructor can convert src->dest
+	bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
+	{
+		// uint<->int<->bool conversions possible
+		if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
+			(dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16))
+			return true;
+
+		// float<->double possible
+		if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) &&
+			(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
+			return true;
+
+		return false;
+	}
+
+#ifdef _MSC_VER
+#define fpcheck(x) (_isnan(x) || !_finite(x))
+#else
+#define fpcheck(x) (std::isnan(x) || std::isinf(x))
+#endif
+
+	// Helper function to print floats with full precision
+	void PrintFloat(bstring b, float f)
+	{
+		bstring temp;
+		int ePos;
+		int pointPos;
+
+		temp = bformat("%.9g", f);
+		ePos = bstrchrp(temp, 'e', 0);
+		pointPos = bstrchrp(temp, '.', 0);
+
+		bconcat(b, temp);
+		bdestroy(temp);
+
+		if (ePos < 0 && pointPos < 0 && !fpcheck(f))
+			bcatcstr(b, ".0");
+	}
+};
+
--- a/src/HLSLccTypes.natvis
+++ b/src/HLSLccTypes.natvis
@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+  <Type Name="Instruction">
+  	<DisplayString>{{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}}</DisplayString>
+  </Type>
+  <Type Name="Operand">
+    <DisplayString>{{ type={eType}, reg={ui32RegisterNumber} }}</DisplayString>
+  </Type>
+
+</AutoVisualizer>
--- a/src/Instruction.cpp
+++ b/src/Instruction.cpp
@ -0,0 +1,351 @@
+
+#include "internal_includes/Instruction.h"
+#include "internal_includes/debug.h"
+#include "include/ShaderInfo.h"
+
+// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles
+static Operand *GetSrcSwizzleOperand(Instruction *psInst)
+{
+	switch (psInst->eOpcode)
+	{
+	case OPCODE_DP2:
+	case OPCODE_DP3:
+	case OPCODE_DP4:
+	case OPCODE_NOP:
+	case OPCODE_SWAPC:
+	case OPCODE_SAMPLE_C:
+	case OPCODE_SAMPLE_C_LZ:
+		ASSERT(0);
+		return NULL;
+
+		// Normal arithmetics, all srcs have swizzles
+	case OPCODE_ADD:
+	case OPCODE_AND:
+	case OPCODE_DERIV_RTX:
+	case OPCODE_DERIV_RTX_COARSE:
+	case OPCODE_DERIV_RTX_FINE:
+	case OPCODE_DERIV_RTY:
+	case OPCODE_DERIV_RTY_COARSE:
+	case OPCODE_DERIV_RTY_FINE:
+	case OPCODE_DIV:
+	case OPCODE_EQ:
+	case OPCODE_EXP:
+	case OPCODE_FRC:
+	case OPCODE_FTOI:
+	case OPCODE_FTOU:
+	case OPCODE_GE:
+	case OPCODE_IADD:
+	case OPCODE_IEQ:
+	case OPCODE_IGE:
+	case OPCODE_ILT:
+	case OPCODE_IMAD:
+	case OPCODE_IMAX:
+	case OPCODE_IMIN:
+	case OPCODE_IMUL:
+	case OPCODE_INE:
+	case OPCODE_INEG:
+	case OPCODE_ITOF:
+	case OPCODE_LOG:
+	case OPCODE_LT:
+	case OPCODE_MAD:
+	case OPCODE_MAX:
+	case OPCODE_MIN:
+	case OPCODE_MOV:
+	case OPCODE_MUL:
+	case OPCODE_NE:
+	case OPCODE_NOT:
+	case OPCODE_OR:
+	case OPCODE_ROUND_NE:
+	case OPCODE_ROUND_NI:
+	case OPCODE_ROUND_PI:
+	case OPCODE_ROUND_Z:
+	case OPCODE_RSQ:
+	case OPCODE_SINCOS:
+	case OPCODE_SQRT:
+	case OPCODE_UDIV:
+	case OPCODE_UGE:
+	case OPCODE_ULT:
+	case OPCODE_UMAD:
+	case OPCODE_UMAX:
+	case OPCODE_UMIN:
+	case OPCODE_UMUL:
+	case OPCODE_UTOF:
+	case OPCODE_XOR:
+
+	case OPCODE_BFI:
+	case OPCODE_BFREV:
+	case OPCODE_COUNTBITS:
+	case OPCODE_DADD:
+	case OPCODE_DDIV:
+	case OPCODE_DEQ:
+	case OPCODE_DFMA:
+	case OPCODE_DGE:
+	case OPCODE_DLT:
+	case OPCODE_DMAX:
+	case OPCODE_DMIN:
+	case OPCODE_DMUL:
+	case OPCODE_DMOV:
+	case OPCODE_DNE:
+	case OPCODE_DRCP:
+	case OPCODE_DTOF:
+	case OPCODE_F16TOF32:
+	case OPCODE_F32TOF16:
+	case OPCODE_FIRSTBIT_HI:
+	case OPCODE_FIRSTBIT_LO:
+	case OPCODE_FIRSTBIT_SHI:
+	case OPCODE_FTOD:
+	case OPCODE_IBFE:
+	case OPCODE_RCP:
+	case OPCODE_UADDC:
+	case OPCODE_UBFE:
+	case OPCODE_USUBB:
+	case OPCODE_MOVC:
+	case OPCODE_DMOVC:
+		return NULL;
+
+		// Special cases:
+	case OPCODE_GATHER4:
+	case OPCODE_GATHER4_C:
+	case OPCODE_LD:
+	case OPCODE_LD_MS:
+	case OPCODE_LOD:
+	case OPCODE_LD_UAV_TYPED:
+	case OPCODE_LD_RAW:
+	case OPCODE_SAMPLE:
+	case OPCODE_SAMPLE_B:
+	case OPCODE_SAMPLE_L:
+	case OPCODE_SAMPLE_D:
+	case OPCODE_RESINFO:
+		return &psInst->asOperands[2];
+
+	case OPCODE_GATHER4_PO:
+	case OPCODE_GATHER4_PO_C:
+	case OPCODE_LD_STRUCTURED:
+		return &psInst->asOperands[3];
+
+	case OPCODE_ISHL:
+	case OPCODE_ISHR:
+	case OPCODE_USHR:
+		return &psInst->asOperands[1];
+
+	default:
+		ASSERT(0);
+		return NULL;
+
+
+	}
+
+}
+
+// Tweak the source operands of an instruction so that the rebased write mask will still work
+static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase)
+{
+	uint32_t i;
+	switch (psOperand->eSelMode)
+	{
+	default:
+	case OPERAND_4_COMPONENT_MASK_MODE:
+		ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL);
+
+		// Special case for immediates, they do not have swizzles
+		if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
+		{
+			if (psOperand->iNumComponents > 1)
+				std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]);
+			return;
+		}
+		if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
+		{
+			if (psOperand->iNumComponents > 1)
+				std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]);
+			return;
+		}
+
+		// Need to change this to swizzle
+		psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE;
+		psOperand->ui32Swizzle = 0;
+		for (i = 0; i < 4 - rebase; i++)
+			psOperand->aui32Swizzle[i] = i + rebase;
+		for (; i < 4; i++)
+			psOperand->aui32Swizzle[i] = rebase; // The first actual input.
+		break;
+	case OPERAND_4_COMPONENT_SELECT_1_MODE:
+		// Nothing to do
+		break;
+	case OPERAND_4_COMPONENT_SWIZZLE_MODE:
+		for (i = rebase; i < 4; i++)
+			psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i];
+		break;
+	}
+}
+
+void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase)
+{
+	uint32_t i = 0;
+	uint32_t accessMask = 0;
+	int isDestination = 0;
+	Operand *psSwizzleOperand = NULL;
+
+	if (flags & UD_CHANGE_SUBOPERANDS)
+	{
+		for (i = 0; i < MAX_SUB_OPERANDS; i++)
+		{
+			if (psOperand->m_SubOperands[i].get())
+				ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase);
+		}
+	}
+
+	if ((flags & UD_CHANGE_MAIN_OPERAND) == 0)
+		return;
+
+	if (psOperand->eType != OPERAND_TYPE_TEMP)
+		return;
+
+	if (psOperand->ui32RegisterNumber != oldReg)
+		return;
+
+	accessMask = psOperand->GetAccessMask();
+	// If this operation touches other components than the one(s) we're splitting, skip it
+	if ((accessMask & (~compMask)) != 0)
+	{
+		// Verify that we've not messed up in reachability analysis.
+		// This would mean that we've encountered an instruction that accesses
+		// a component in multi-component mode and we're supposed to treat it as single-use only.
+		// Now that we track operands we can bring this back
+		ASSERT((accessMask & compMask) == 0);
+		return;
+	}
+
+#if 0
+	printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask);
+#endif
+	psOperand->ui32RegisterNumber = newReg;
+
+	if (rebase == 0)
+		return;
+
+	// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
+	switch (psOperand->eSelMode)
+	{
+	case OPERAND_4_COMPONENT_MASK_MODE:
+	{
+										  uint32_t oldMask = psOperand->ui32CompMask;
+										  if (oldMask == 0)
+											  oldMask = OPERAND_4_COMPONENT_MASK_ALL;
+
+										  // Check that we're not losing any information
+										  ASSERT((oldMask >> rebase) << rebase == oldMask);
+										  psOperand->ui32CompMask = (oldMask >> rebase);
+										  break;
+	}
+	case OPERAND_4_COMPONENT_SELECT_1_MODE:
+		ASSERT(psOperand->aui32Swizzle[0] >= rebase);
+		psOperand->aui32Swizzle[0] -= rebase;
+		break;
+	case OPERAND_4_COMPONENT_SWIZZLE_MODE:
+	{
+											 for (i = 0; i < 4; i++)
+											 {
+												 // Note that this rebase is different from the one done for source operands
+												 ASSERT(psOperand->aui32Swizzle[i] >= rebase);
+												 psOperand->aui32Swizzle[i] -= rebase;
+											 }
+											 break;
+	}
+	default:
+		ASSERT(0);
+
+	}
+
+	// Tweak operand datatypes
+	std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]);
+
+	// If this operand is a destination, we'll need to tweak sources as well
+	for (i = 0; i < ui32FirstSrc; i++)
+	{
+		if (psOperand == &asOperands[i])
+		{
+			isDestination = 1;
+			break;
+		}
+	}
+
+	if (isDestination == 0)
+		return;
+
+	// Nasty corner case of 2 destinations, not supported if both targets are written
+	ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL));
+
+	// If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction
+	switch (eOpcode)
+	{
+		// The opcodes that do not need tweaking:
+	case OPCODE_DP2:
+	case OPCODE_DP3:
+	case OPCODE_DP4:
+	case OPCODE_BUFINFO:
+	case OPCODE_SAMPLE_C:
+	case OPCODE_SAMPLE_C_LZ:
+		return;
+
+	default:
+		psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands
+		if (psSwizzleOperand)
+		{
+			DoSrcOperandRebase(psSwizzleOperand, rebase);
+			return;
+		}
+		else
+		{
+			for (i = ui32FirstSrc; i < ui32NumOperands; i++)
+			{
+				DoSrcOperandRebase(&asOperands[i], rebase);
+			}
+		}
+		return;
+	}
+
+}
+
+
+// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision
+bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const
+{
+	const Operand *op;
+	const ResourceBinding *psBinding = NULL;
+	OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
+	switch (eOpcode)
+	{
+	default:
+		return false;
+	case OPCODE_SAMPLE:
+	case OPCODE_SAMPLE_B:
+	case OPCODE_SAMPLE_L:
+	case OPCODE_SAMPLE_D:
+	case OPCODE_SAMPLE_C:
+	case OPCODE_SAMPLE_C_LZ:
+		break;
+	}
+
+	op = &asOperands[3];
+	ASSERT(op->eType == OPERAND_TYPE_SAMPLER);
+
+	info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding);
+	if (!psBinding)
+	{
+		/* Try to look from texture group */
+		info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding);
+	}
+
+	sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN);
+
+	if (sType == OPERAND_MIN_PRECISION_DEFAULT)
+		return false;
+
+	if (pType)
+		*pType = sType;
+
+	return true;
+}
+
+
--- a/src/LoopTransform.cpp
+++ b/src/LoopTransform.cpp
@ -0,0 +1,363 @@
+
+#include "src/internal_includes/LoopTransform.h"
+#include "src/internal_includes/Shader.h"
+#include "src/internal_includes/debug.h"
+#include <algorithm>
+#include <vector>
+#include <list>
+
+namespace HLSLcc
+{
+
+	struct LoopInfo
+	{
+	public:
+		LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {}
+
+		Instruction *	m_StartLoop; // OPCODE_LOOP
+		Instruction *	m_EndLoop;   // OPCODE_ENDLOOP that matches the LOOP above.
+		std::vector<Instruction *> m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth
+		bool			m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing.
+	};
+	
+	typedef std::list<LoopInfo> Loops;
+
+	// Build a loopinfo array of all the loops in this shader phase
+	void BuildLoopInfo(ShaderPhase &phase, Loops &res)
+	{
+		using namespace std;
+		res.clear();
+
+		Instruction *i = &phase.psInst[0];
+		// A stack of loopinfo elements (stored in res)
+		list<LoopInfo *> loopStack;
+
+		// Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here.
+		list<LoopInfo> dummyLIForSwitches;
+
+		while (i != &*phase.psInst.end())
+		{
+			if (i->eOpcode == OPCODE_LOOP)
+			{
+				LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo());
+				currLoopInfo->m_StartLoop = i;
+				loopStack.push_front(currLoopInfo);
+			}
+			else if(i->eOpcode == OPCODE_ENDLOOP)
+			{
+				ASSERT(!loopStack.empty());
+				LoopInfo *li = *loopStack.begin();
+				loopStack.pop_front();
+				li->m_EndLoop = i;
+			}
+			else if (i->eOpcode == OPCODE_SWITCH)
+			{
+				// Create a dummy entry into the stack
+				LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo());
+				li->m_IsSwitch = true;
+				loopStack.push_front(li);
+			}
+			else if (i->eOpcode == OPCODE_ENDSWITCH)
+			{
+				ASSERT(!loopStack.empty());
+				LoopInfo *li = *loopStack.begin();
+				loopStack.pop_front();
+				ASSERT(li->m_IsSwitch);
+			}
+			else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC)
+			{
+				// Get the current loopstack head
+				ASSERT(!loopStack.empty());
+				LoopInfo *li = *loopStack.begin();
+				// Ignore breaks from switch-cases
+				if(!li->m_IsSwitch)
+				{
+					li->m_ExitPoints.push_back(i);
+				}
+			}
+			i++;
+		}
+
+	}
+
+	// Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp
+	static bool IsScalarTempComparisonInstruction(const Instruction *i)
+	{
+		switch (i->eOpcode)
+		{
+		default:
+			return false;
+		case OPCODE_IGE:
+		case OPCODE_ILT:
+		case OPCODE_IEQ:
+		case OPCODE_INE:
+		case OPCODE_UGE:
+		case OPCODE_ULT:
+			break;
+		}
+
+		if (i->asOperands[0].eType != OPERAND_TYPE_TEMP)
+			return false;
+
+		int tempOp = -1;
+		if (i->asOperands[1].eType == OPERAND_TYPE_TEMP)
+			tempOp = 1;
+		else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP)
+			tempOp = 2;
+
+		// Also reject comparisons where we compare temp.x vs temp.y
+		if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber)
+			return false;
+
+		if (tempOp == -1)
+			return false;
+
+		if (i->asOperands[0].GetNumSwizzleElements() != 1)
+			return false;
+
+		return true;
+	}
+
+	// Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX <op> imm32
+	static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b)
+	{
+		if (a->eOpcode != b->eOpcode)
+			return false;
+		ASSERT(a->ui32NumOperands == b->ui32NumOperands);
+		uint32_t dstReg = 0;
+		if (a->asOperands[0].eType != OPERAND_TYPE_TEMP)
+			return false;
+		dstReg = a->asOperands[0].ui32RegisterNumber;
+
+		for (uint32_t i = 0; i < a->ui32NumOperands; i++)
+		{
+			const Operand &aop = a->asOperands[i];
+			const Operand &bop = b->asOperands[i];
+			if (aop.eType != bop.eType)
+				return false;
+
+			if (aop.GetAccessMask() != bop.GetAccessMask())
+				return false;
+
+			if (aop.GetNumSwizzleElements() != 1)
+				return false;
+
+			if (aop.eType == OPERAND_TYPE_TEMP)
+			{
+				if (aop.ui32RegisterNumber != bop.ui32RegisterNumber)
+					return false;
+				if (aop.ui32RegisterNumber != dstReg)
+					return false;
+			}
+			else if (aop.eType == OPERAND_TYPE_IMMEDIATE32)
+			{
+				if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0)
+					return false;
+			}
+		}
+		return true;
+	}
+
+	// Attempt to transform a single loop into a for-statement
+	static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li)
+	{
+		// In order to transform a loop into a for, the following has to hold:
+		// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
+		// - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above
+		// Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement.
+		// Also, the loop induction variable must be standalone (as in, never used as part of a larger vector)
+
+		Instruction *cmpInst = li.m_StartLoop + 1;
+
+		if (!IsScalarTempComparisonInstruction(cmpInst))
+			return;
+
+		Instruction *breakInst = li.m_StartLoop + 2;
+		if (breakInst->eOpcode != OPCODE_BREAKC)
+			return;
+		if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
+			return;
+		if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber)
+			return;
+
+		// Check that the comparison result isn't used anywhere else
+		if (cmpInst->m_Uses.size() != 1)
+			return;
+
+		ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst);
+
+		// Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable
+		uint32_t inductionVarIdx = 0;
+
+		Instruction *lastInst = li.m_EndLoop - 1;
+		if (lastInst->eOpcode != OPCODE_IADD)
+			return;
+		if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
+			return;
+
+		if (lastInst->asOperands[0].GetNumSwizzleElements() != 1)
+			return;
+
+		uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber;
+		// Verify that the induction variable actually matches.
+		if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar)
+			inductionVarIdx = 1;
+		else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar)
+			inductionVarIdx = 2;
+		else
+			return;
+
+		// Verify that we also read from the induction variable in the last instruction
+		if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) ||
+			(lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar)))
+			return;
+
+		// Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops,
+		// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
+		// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
+		// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
+		for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
+		{
+			switch (itr->eOpcode)
+			{
+			case OPCODE_LD_RAW:
+			case OPCODE_LD_STRUCTURED:
+			case OPCODE_LD_UAV_TYPED:
+			case OPCODE_STORE_RAW:
+			case OPCODE_STORE_STRUCTURED:
+			case OPCODE_STORE_UAV_TYPED:
+				return; // Nope, can't do a for, not even a partial one.
+			default:
+				break;
+			}
+		}
+
+		// One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst.
+		// Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called.
+		// Of course, if all those instructions are identical, then it's fine.
+		// Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well.
+
+		Instruction *initializer = NULL;
+		std::vector<const Operand::Define *> definitionsOutsideRange;
+		std::vector<const Operand::Define *> definitionsInsideRange;
+		std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def)
+		{
+			if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop)
+				definitionsOutsideRange.push_back(&def);
+			else
+				definitionsInsideRange.push_back(&def);
+		});
+
+		if (definitionsInsideRange.size() != 1)
+		{
+			// All definitions must be identical
+			for (std::vector<const Operand::Define*>::iterator itr = definitionsInsideRange.begin()+1; itr != definitionsInsideRange.end(); itr++)
+			{
+				if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst))
+					return;
+			}
+		}
+
+		ASSERT(definitionsOutsideRange.size() > 0);
+		if (definitionsOutsideRange.size() == 1)
+			initializer = definitionsOutsideRange[0]->m_Inst;
+
+		// Initializer must only write to one component
+		if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
+			initializer = 0;
+
+		// Check that the initializer is only used within the range so we can move it to for statement
+		if (initializer)
+		{
+			bool hasUsesOutsideRange = false;
+			std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u)
+			{
+				if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
+					hasUsesOutsideRange = true;
+			});
+			// Has outside uses? we cannot pull that up to the for statement
+			if (hasUsesOutsideRange)
+				initializer = 0;
+		}
+
+		// Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either
+		if (initializer)
+		{
+			bool cannotDoInitializer = false;
+			for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++)
+			{
+				const Instruction::Use &u = *itr;
+				if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
+				{
+					cannotDoInitializer = true;
+					break;
+				}
+				// Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var)
+				if (u.m_Op->GetAccessMask() != 1)
+				{
+					cannotDoInitializer = true;
+					break;
+				}
+			}
+			// Has outside uses? we cannot pull that up to the for statement
+			if (cannotDoInitializer)
+				initializer = 0;
+		}
+
+
+		if (initializer)
+		{
+			// We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that.
+			uint32_t newRegister = phase.m_NextFreeTempRegister++;
+			li.m_StartLoop->m_InductorRegister = newRegister;
+			std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u)
+			{
+				u.m_Op->m_ForLoopInductorName = newRegister;
+			});
+			// Also tweak the destinations for cmpInst, and lastInst
+			if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
+				cmpInst->asOperands[1].m_ForLoopInductorName = newRegister;
+			else
+				cmpInst->asOperands[2].m_ForLoopInductorName = newRegister;
+
+			if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
+				lastInst->asOperands[1].m_ForLoopInductorName = newRegister;
+			else
+				lastInst->asOperands[2].m_ForLoopInductorName = newRegister;
+
+			lastInst->asOperands[0].m_ForLoopInductorName = newRegister;
+			initializer->asOperands[0].m_ForLoopInductorName = newRegister;
+		}
+
+		// This loop can be transformed to for-loop. Do the necessary magicks.
+		li.m_StartLoop->m_LoopInductors[0] = initializer;
+		li.m_StartLoop->m_LoopInductors[1] = cmpInst;
+		li.m_StartLoop->m_LoopInductors[2] = breakInst;
+		li.m_StartLoop->m_LoopInductors[3] = lastInst;
+
+		if (initializer)
+			initializer->m_SkipTranslation = true;
+		cmpInst->m_SkipTranslation = true;
+		breakInst->m_SkipTranslation = true;
+		lastInst->m_SkipTranslation = true;
+
+	}
+
+	void DoLoopTransform(ShaderPhase &phase)
+	{
+		Loops loops;
+		BuildLoopInfo(phase, loops);
+
+		std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li)
+		{
+			// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
+			// Also that there's at least 2 instructions in loop body
+			ASSERT(li.m_StartLoop != 0);
+			ASSERT(li.m_EndLoop != 0);
+			ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
+			ASSERT(!li.m_IsSwitch);
+			ASSERT(!li.m_ExitPoints.empty());
+			AttemptLoopTransform(phase, li);
+		});
+	}
+};
--- a/src/Operand.cpp
+++ b/src/Operand.cpp
@ -0,0 +1,586 @@
+
+#include "internal_includes/Operand.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/Instruction.h"
+
+uint32_t Operand::GetAccessMask() const
+{
+	int i;
+	uint32_t accessMask = 0;
+	// TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now.
+	switch (eSelMode)
+	{
+	default:
+	case OPERAND_4_COMPONENT_MASK_MODE:
+		// Update access mask
+		accessMask = ui32CompMask;
+		if (accessMask == 0)
+			accessMask = OPERAND_4_COMPONENT_MASK_ALL;
+		break;
+
+	case OPERAND_4_COMPONENT_SWIZZLE_MODE:
+		accessMask = 0;
+		for (i = 0; i < 4; i++)
+			accessMask |= 1 << (aui32Swizzle[i]);
+		break;
+
+	case OPERAND_4_COMPONENT_SELECT_1_MODE:
+		accessMask = 1 << (aui32Swizzle[0]);
+		break;
+
+	}
+	ASSERT(accessMask != 0);
+	return accessMask;
+}
+
+int Operand::GetMaxComponent() const
+{
+	if (iWriteMaskEnabled &&
+		iNumComponents == 4)
+	{
+		//Component Mask
+		if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
+		{
+			if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W))
+			{
+				if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W)
+				{
+					return 4;
+				}
+				if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)
+				{
+					return 3;
+				}
+				if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)
+				{
+					return 2;
+				}
+				if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X)
+				{
+					return 1;
+				}
+			}
+		}
+		else
+			//Component Swizzle
+		if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
+		{
+			if (ui32Swizzle == NO_SWIZZLE)
+				return 4;
+
+			uint32_t res = 0;
+			for (int i = 0; i < 4; i++)
+			{
+				res = std::max(aui32Swizzle[i], res);
+			}
+			return (int)res + 1;
+		}
+		else
+		if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
+		{
+			return 1;
+		}
+	}
+
+	return 4;
+}
+
+//Single component repeated
+//e..g .wwww
+bool Operand::IsSwizzleReplicated() const
+{
+	if (iWriteMaskEnabled &&
+		iNumComponents == 4)
+	{
+		if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
+		{
+			if (ui32Swizzle == WWWW_SWIZZLE ||
+				ui32Swizzle == ZZZZ_SWIZZLE ||
+				ui32Swizzle == YYYY_SWIZZLE ||
+				ui32Swizzle == XXXX_SWIZZLE)
+			{
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+
+// Get the number of elements returned by operand, taking additional component mask into account
+uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const
+{
+	uint32_t count = 0;
+
+	switch (eType)
+	{
+	case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
+		return 1; // TODO: does mask make any sense here?
+	case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
+	case OPERAND_TYPE_INPUT_THREAD_ID:
+	case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+		// Adjust component count and break to more processing
+		((Operand *)this)->iNumComponents = 3;
+		break;
+	case OPERAND_TYPE_IMMEDIATE32:
+	case OPERAND_TYPE_IMMEDIATE64:
+	case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+	case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+	case OPERAND_TYPE_OUTPUT_DEPTH:
+	{
+		// Translate numComponents into bitmask
+		// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
+		uint32_t compMask = (1 << iNumComponents) - 1;
+
+		compMask &= _ui32CompMask;
+		// Calculate bits left in compMask
+		return HLSLcc::GetNumberBitsSet(compMask);
+	}
+	default:
+	{
+			   break;
+	}
+	}
+
+	if (iWriteMaskEnabled &&
+		iNumComponents != 1)
+	{
+		//Component Mask
+		if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
+		{
+			uint32_t compMask = ui32CompMask;
+			if (compMask == 0)
+				compMask = OPERAND_4_COMPONENT_MASK_ALL;
+			compMask &= _ui32CompMask;
+
+			if (compMask == OPERAND_4_COMPONENT_MASK_ALL)
+				return 4;
+
+			if (compMask & OPERAND_4_COMPONENT_MASK_X)
+			{
+				count++;
+			}
+			if (compMask & OPERAND_4_COMPONENT_MASK_Y)
+			{
+				count++;
+			}
+			if (compMask & OPERAND_4_COMPONENT_MASK_Z)
+			{
+				count++;
+			}
+			if (compMask & OPERAND_4_COMPONENT_MASK_W)
+			{
+				count++;
+			}
+		}
+		else
+			//Component Swizzle
+		if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
+		{
+			uint32_t i;
+			for (i = 0; i < 4; ++i)
+			{
+				if ((_ui32CompMask & (1 << i)) == 0)
+					continue;
+
+				count++;
+			}
+		}
+		else
+		if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
+		{
+			if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X))
+			{
+				count++;
+			}
+			else
+			if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y))
+			{
+				count++;
+			}
+			else
+			if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z))
+			{
+				count++;
+			}
+			else
+			if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W))
+			{
+				count++;
+			}
+		}
+
+		//Component Select 1
+	}
+
+	if (!count)
+	{
+		// Translate numComponents into bitmask
+		// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
+		uint32_t compMask = (1 << iNumComponents) - 1;
+
+		compMask &= _ui32CompMask;
+		// Calculate bits left in compMask
+		return HLSLcc::GetNumberBitsSet(compMask);
+	}
+
+	return count;
+}
+
+// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
+int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const
+{
+	if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER)
+		return 0;
+
+	if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE)
+		return 0;
+
+	if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT)
+		return 0;
+
+	if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT)
+		return 0;
+
+	return 1;
+}
+
+int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
+{
+	return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase);
+}
+
+SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
+{
+	// The min precision qualifier overrides all of the stuff below
+	switch (eMinPrecision)
+	{
+	case OPERAND_MIN_PRECISION_FLOAT_16:
+		return SVT_FLOAT16;
+	case OPERAND_MIN_PRECISION_FLOAT_2_8:
+		return SVT_FLOAT10;
+	case OPERAND_MIN_PRECISION_SINT_16:
+		return SVT_INT16;
+	case OPERAND_MIN_PRECISION_UINT_16:
+		return SVT_UINT16;
+	default:
+		break;
+	}
+
+	switch (eType)
+	{
+	case OPERAND_TYPE_TEMP:
+	{
+		SHADER_VARIABLE_TYPE eCurrentType;
+		int i = 0;
+
+		if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
+		{
+			return aeDataType[aui32Swizzle[0]];
+		}
+		if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
+		{
+			if (ui32Swizzle == (NO_SWIZZLE))
+			{
+				return aeDataType[0];
+			}
+
+			return aeDataType[aui32Swizzle[0]];
+		}
+
+		if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
+		{
+			uint32_t mask = ui32CompMask;
+			if (!mask)
+			{
+				mask = OPERAND_4_COMPONENT_MASK_ALL;
+			}
+			for (; i < 4; ++i)
+			{
+				if (mask & (1 << i))
+				{
+					eCurrentType = aeDataType[i];
+					break;
+				}
+			}
+
+#ifdef _DEBUG
+			//Check if all elements have the same basic type.
+			for (; i < 4; ++i)
+			{
+				if (mask & (1 << i))
+				{
+					if (eCurrentType != aeDataType[i])
+					{
+						ASSERT(0);
+					}
+				}
+			}
+#endif
+			return eCurrentType;
+		}
+
+		ASSERT(0);
+
+		break;
+	}
+	case OPERAND_TYPE_OUTPUT:
+	{
+		const uint32_t ui32Register = ui32RegisterNumber;
+		int regSpace = GetRegisterSpace(psContext);
+		const ShaderInfo::InOutSignature* psOut = NULL;
+
+		if (regSpace == 0)
+			psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
+			&psOut);
+		else
+			psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut);
+
+		ASSERT(psOut != NULL);
+		if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
+		{
+			switch (psOut->eMinPrec)
+			{
+			default:
+				ASSERT(0);
+				break;
+			case MIN_PRECISION_FLOAT_16:
+				return SVT_FLOAT16;
+			case MIN_PRECISION_FLOAT_2_8:
+				if (psContext->psShader->eTargetLanguage == LANG_METAL)
+					return SVT_FLOAT16;
+				else
+					return SVT_FLOAT10;
+			case MIN_PRECISION_SINT_16:
+				return SVT_INT16;
+			case MIN_PRECISION_UINT_16:
+				return SVT_UINT16;
+			}
+		}
+		if (psOut->eComponentType == INOUT_COMPONENT_UINT32)
+		{
+			return SVT_UINT;
+		}
+		else if (psOut->eComponentType == INOUT_COMPONENT_SINT32)
+		{
+			return SVT_INT;
+		}
+		return SVT_FLOAT;
+		break;
+	}
+	case OPERAND_TYPE_INPUT:
+	{
+		const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1];
+		int regSpace = GetRegisterSpace(psContext);
+		const ShaderInfo::InOutSignature* psIn = NULL;
+
+		if (regSpace == 0)
+		{
+			if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0)
+				return SVT_FLOAT; // All combined inputs are stored as floats
+			psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(),
+				&psIn);
+		}
+		else
+		{
+			if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0)
+				return SVT_FLOAT; // All combined inputs are stored as floats
+			psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn);
+		}
+
+		ASSERT(psIn != NULL);
+
+		switch (eSpecialName)
+		{
+			//UINT in DX, INT in GL.
+		case NAME_PRIMITIVE_ID:
+		case NAME_VERTEX_ID:
+		case NAME_INSTANCE_ID:
+		case NAME_RENDER_TARGET_ARRAY_INDEX:
+		case NAME_VIEWPORT_ARRAY_INDEX:
+		case NAME_SAMPLE_INDEX:
+
+			return SVT_INT;
+
+		case NAME_IS_FRONT_FACE:
+			return SVT_UINT;
+
+		case NAME_POSITION:
+		case NAME_CLIP_DISTANCE:
+			return SVT_FLOAT;
+
+		default:
+			break;
+			// fall through
+		}
+
+		if (psIn->eSystemValueType == NAME_IS_FRONT_FACE)
+			return SVT_UINT;
+
+		if (eSpecialName == NAME_PRIMITIVE_ID || eSpecialName == NAME_VERTEX_ID)
+		{
+			return SVT_INT;
+		}
+
+		//UINT in DX, INT in GL.
+		if (psIn->eSystemValueType == NAME_INSTANCE_ID ||
+			psIn->eSystemValueType == NAME_PRIMITIVE_ID ||
+			psIn->eSystemValueType == NAME_VERTEX_ID ||
+			psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX ||
+			psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX ||
+			psIn->eSystemValueType == NAME_SAMPLE_INDEX
+			)
+		{
+			return SVT_INT;
+		}
+
+		if (psIn->eMinPrec != MIN_PRECISION_DEFAULT)
+		{
+			switch (psIn->eMinPrec)
+			{
+			default:
+				ASSERT(0);
+				break;
+			case MIN_PRECISION_FLOAT_16:
+				return SVT_FLOAT16;
+			case MIN_PRECISION_FLOAT_2_8:
+				if (psContext->psShader->eTargetLanguage == LANG_METAL)
+					return SVT_FLOAT16;
+				else
+					return SVT_FLOAT10;
+			case MIN_PRECISION_SINT_16:
+				return SVT_INT16;
+			case MIN_PRECISION_UINT_16:
+				return SVT_UINT16;
+			}
+		}
+
+		if (psIn->eComponentType == INOUT_COMPONENT_UINT32)
+		{
+			return SVT_UINT;
+		}
+		else if (psIn->eComponentType == INOUT_COMPONENT_SINT32)
+		{
+			return SVT_INT;
+		}
+		return SVT_FLOAT;
+		break;
+	}
+	case OPERAND_TYPE_CONSTANT_BUFFER:
+	{
+		const ConstantBuffer* psCBuf = NULL;
+		const ShaderVarType* psVarType = NULL;
+		int32_t rebase = -1;
+		bool isArray;
+		int foundVar;
+		psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
+		if (psCBuf)
+		{
+			foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
+			if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing?
+			{
+				return psVarType->Type;
+			}
+		}
+		else
+		{
+			// Todo: this isn't correct yet.
+			return SVT_FLOAT;
+		}
+		break;
+	}
+	case OPERAND_TYPE_IMMEDIATE32:
+	{
+		return ePreferredTypeForImmediates;
+	}
+
+	case OPERAND_TYPE_IMMEDIATE64:
+	{
+		return SVT_DOUBLE;
+	}
+
+	case OPERAND_TYPE_INPUT_THREAD_ID:
+	case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+	case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
+	case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
+	{
+		return SVT_UINT;
+	}
+	case OPERAND_TYPE_SPECIAL_ADDRESS:
+	case OPERAND_TYPE_SPECIAL_LOOPCOUNTER:
+	case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
+	case OPERAND_TYPE_INPUT_PRIMITIVEID:
+	{
+		return SVT_INT;
+	}
+	case OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
+	{
+		return SVT_UINT;
+	}
+	case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
+	{
+		return SVT_INT;
+	}
+	case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
+	{
+		return SVT_INT;
+	}
+	case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats
+	case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently
+	default:
+	{
+		return SVT_FLOAT;
+	}
+	}
+
+	return SVT_FLOAT;
+}
+
+OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec)
+{
+	switch (ePrec)
+	{
+	default:
+	case REFLECT_RESOURCE_PRECISION_UNKNOWN:
+	case REFLECT_RESOURCE_PRECISION_LOWP:
+		return OPERAND_MIN_PRECISION_FLOAT_2_8;
+	case REFLECT_RESOURCE_PRECISION_MEDIUMP:
+		return OPERAND_MIN_PRECISION_FLOAT_16;
+	case REFLECT_RESOURCE_PRECISION_HIGHP:
+		return OPERAND_MIN_PRECISION_DEFAULT;
+	}
+}
+
+int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const
+{
+	const ShaderInfo::InOutSignature *psSig = NULL;
+	int regSpace = GetRegisterSpace(psContext);
+
+	switch (eType)
+	{
+		case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
+		case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
+		case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
+			return 1;
+		case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
+		case OPERAND_TYPE_INPUT_THREAD_ID:
+		case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+		case OPERAND_TYPE_INPUT_DOMAIN_POINT:
+			return 3;
+		default:
+			break;
+	}
+
+	if (regSpace == 0)
+		psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
+	else
+		psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
+
+	ASSERT(psSig != NULL);
+
+	// TODO: Are there ever any cases where the mask has 'holes'?
+	return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
+}
--- a/src/Shader.cpp
+++ b/src/Shader.cpp
--- a/src/ShaderInfo.cpp
+++ b/src/ShaderInfo.cpp
@ -0,0 +1,387 @@
+
+#include "ShaderInfo.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/tokens.h"
+#include "Operand.h"
+#include <stdlib.h>
+#include <sstream>
+
+SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
+{
+	const ResourceBinding* psBinding = 0;
+	int found;
+	found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding);
+	ASSERT(found != 0);
+	return psBinding->GetDataType();
+}
+
+void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const
+{
+	ASSERT(ui32MajorVersion > 3);
+	*ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]];
+}
+
+int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const
+{
+	size_t i;
+	const size_t ui32NumBindings = psResourceBindings.size();
+	const ResourceBinding* psBindings = &psResourceBindings[0];
+
+	for (i = 0; i < ui32NumBindings; ++i)
+	{
+		if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup)
+		{
+			if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount))
+			{
+				*ppsOutBinding = psBindings + i;
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const
+{
+	size_t i;
+	const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size();
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset &&
+			ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size))
+		{
+			*ppsShaderVar = &psThisPointerConstBuffer->asVars[i];
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
+{
+	size_t i;
+	const size_t ui32NumVars = psInputSignatures.size();
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0))
+		{
+			*ppsOut = &psInputSignatures[i];
+			return 1;
+		}
+	}
+	ASSERT(allowNull);
+	return 0;
+}
+
+int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
+{
+	size_t i;
+	const size_t ui32NumVars = psPatchConstantSignatures.size();
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0))
+		{
+			*ppsOut = &psPatchConstantSignatures[i];
+			return 1;
+		}
+	}
+
+	if (allowNull)
+		return 0;
+
+	// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
+	// In those situations just take the last signature that uses that register (it's typically the "highest" one)
+	for (i = ui32NumVars - 1; i != 0xffffffff; i--)
+	{
+		if (ui32Register == psPatchConstantSignatures[i].ui32Register)
+		{
+			*ppsOut = &psPatchConstantSignatures[i];
+			return 1;
+		}
+	}
+
+
+	ASSERT(0);
+	return 0;
+}
+
+int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register,
+	const uint32_t ui32CompMask,
+	const uint32_t ui32Stream,
+	const InOutSignature** ppsOut,
+	bool allowNull /* = false */) const
+{
+	size_t i;
+	const size_t ui32NumVars = psOutputSignatures.size();
+	ASSERT(ui32CompMask != 0);
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		if (ui32Register == psOutputSignatures[i].ui32Register &&
+			(ui32CompMask & psOutputSignatures[i].ui32Mask) &&
+			ui32Stream == psOutputSignatures[i].ui32Stream)
+		{
+			*ppsOut = &psOutputSignatures[i];
+			return 1;
+		}
+	}
+	ASSERT(allowNull);
+	return 0;
+}
+
+int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const
+{
+	size_t i;
+	const size_t ui32NumVars = psOutputSignatures.size();
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		if (eSystemValueType == psOutputSignatures[i].eSystemValueType &&
+			ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex)
+		{
+			*ppsOut = &psOutputSignatures[i];
+			return 1;
+		}
+	}
+	ASSERT(0);
+	return 0;
+}
+
+static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors)
+{
+	// Struct size is calculated from the offset and size of its last member
+	if (psType->Class == SVC_STRUCT)
+	{
+		return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors);
+	}
+
+	// Matrices represented as vec4 arrays have special size calculation 
+	if (matrixAsVectors)
+	{
+		if (psType->Class == SVC_MATRIX_ROWS)
+		{
+			return psType->Rows * 16;
+		}
+		else if (psType->Class == SVC_MATRIX_COLUMNS)
+		{
+			return psType->Columns * 16;
+		}
+	}
+
+	// Regular matrices, vectors and scalars 
+	return psType->Columns * psType->Rows * 4;
+}
+
+static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
+	uint32_t parentOffset,
+	uint32_t offsetToFind,
+	bool* isArray,
+	std::vector<uint32_t>* arrayIndices,
+	int32_t* pi32Rebase,
+	uint32_t flags)
+{
+	uint32_t thisOffset = parentOffset + psType->Offset;
+	uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
+	uint32_t paddedSize = thisSize;
+	if (thisSize % 16 > 0) 
+		paddedSize += (16 - (thisSize % 16));
+	uint32_t arraySize = thisSize;
+
+	// Array elements are padded to align on vec4 size, except for the last one
+	if (psType->Elements)
+		arraySize = (paddedSize * (psType->Elements - 1)) + thisSize;
+
+	if ((offsetToFind >= thisOffset) &&
+		offsetToFind < (thisOffset + arraySize))
+	{
+		*isArray = false;
+		if (psType->Class == SVC_STRUCT)
+		{
+			if (psType->Elements > 1 && arrayIndices != NULL)
+				arrayIndices->push_back((offsetToFind - thisOffset) / thisSize);
+
+			// Need to bring offset back to element zero in case of array of structs
+			uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize;
+			uint32_t m = 0;
+
+			for (m = 0; m < psType->MemberCount; ++m)
+			{
+				const ShaderVarType* psMember = &psType->Members[m];
+
+				const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags);
+				if (foundType != NULL)
+					return foundType;
+			}
+		}
+		// Check for array of scalars or vectors (both take up 16 bytes per element).
+		// Matrices are also treated as arrays of vectors.
+		else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || 
+			((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1))
+		{
+			*isArray = true;
+			if (arrayIndices != NULL)
+				arrayIndices->push_back((offsetToFind - thisOffset) / 16);
+		}
+		else if (psType->Class == SVC_VECTOR)
+		{
+			//Check for vector starting at a non-vec4 offset.
+
+			// cbuffer $Globals
+			// {
+			//
+			//   float angle;                       // Offset:    0 Size:     4
+			//   float2 angle2;                     // Offset:    4 Size:     8
+			//
+			// }
+
+			//cb0[0].x = angle
+			//cb0[0].yzyy = angle2.xyxx
+
+			//Rebase angle2 so that .y maps to .x, .z maps to .y
+
+			pi32Rebase[0] = thisOffset % 16;
+		}
+
+		return psType;
+	}
+	return NULL;
+}
+
+int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
+	const uint32_t(&pui32Swizzle)[4],
+	const ConstantBuffer* psCBuf,
+	const ShaderVarType** ppsShaderVar, // Output the found var
+	bool* isArray, // Output bool that tells if the found var is an array
+	std::vector<uint32_t>* arrayIndices, // Output vector of array indices in order from root parent to the found var
+	int32_t* pi32Rebase, // Output swizzle rebase
+	uint32_t flags)
+{
+	size_t i;
+
+	uint32_t ui32ByteOffset = ui32Vec4Offset * 16;
+
+	//Swizzle can point to another variable. In the example below
+	//cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined
+	//into vectors. psCBuf->ui32NumVars will be 3.
+
+	// cbuffer cbUIUpdates
+	// {
+	//   float g_fLifeSpan;                 // Offset:    0 Size:     4
+	//   float g_fLifeSpanVar;              // Offset:    4 Size:     4 [unused]
+	//   float g_fRadiusMin;                // Offset:    8 Size:     4 [unused]
+	//   float g_fRadiusMax;                // Offset:   12 Size:     4 [unused]
+	//   float g_fGrowTime;                 // Offset:   16 Size:     4 [unused]
+	//   float g_fStepSize;                 // Offset:   20 Size:     4
+	//   float g_fTurnRate;                 // Offset:   24 Size:     4
+	//   float g_fTurnSpeed;                // Offset:   28 Size:     4 [unused]
+	//   float g_fLeafRate;                 // Offset:   32 Size:     4
+	//   float g_fShrinkTime;               // Offset:   36 Size:     4 [unused]
+	//   uint g_uMaxFaces;                  // Offset:   40 Size:     4
+	// }
+	if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y)
+	{
+		ui32ByteOffset += 4;
+	}
+	else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z)
+	{
+		ui32ByteOffset += 8;
+	}
+	else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W)
+	{
+		ui32ByteOffset += 12;
+	}
+
+	const size_t ui32NumVars = psCBuf->asVars.size();
+
+	for (i = 0; i < ui32NumVars; ++i)
+	{
+		ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags);
+		
+		if (ppsShaderVar[0] != NULL)
+			return 1;
+	}
+	return 0;
+}
+
+// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
+// Searches for brackets and inserts indices one by one.
+std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices)
+{
+	std::ostringstream oss;
+	size_t prevpos = 0;
+	size_t pos = psShaderVar->fullName.find('[', 0);
+	uint32_t i = 0;
+	while (pos != std::string::npos)
+	{
+		pos++;
+		oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
+		if (i < indices.size())
+			oss << indices[i];
+		prevpos = pos;
+		i++;
+		pos = psShaderVar->fullName.find('[', prevpos);
+	}
+	oss << psShaderVar->fullName.substr(prevpos);
+
+	return oss.str();
+}
+
+ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
+{
+	switch (eType)
+	{
+	case RTYPE_CBUFFER:
+		return RGROUP_CBUFFER;
+
+	case RTYPE_SAMPLER:
+		return RGROUP_SAMPLER;
+
+	case RTYPE_TEXTURE:
+	case RTYPE_BYTEADDRESS:
+	case RTYPE_STRUCTURED:
+		return RGROUP_TEXTURE;
+
+	case RTYPE_UAV_RWTYPED:
+	case RTYPE_UAV_RWSTRUCTURED:
+	case RTYPE_UAV_RWBYTEADDRESS:
+	case RTYPE_UAV_APPEND_STRUCTURED:
+	case RTYPE_UAV_CONSUME_STRUCTURED:
+	case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
+		return RGROUP_UAV;
+
+	case RTYPE_TBUFFER:
+		ASSERT(0); // Need to find out which group this belongs to
+		return RGROUP_TEXTURE;
+    default:
+            break;
+	}
+
+	ASSERT(0);
+	return RGROUP_CBUFFER;
+}
+
+void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
+{
+	if (info.empty())
+		return;
+
+	for (size_t i = 0; i < psResourceBindings.size(); i++)
+	{
+		ResourceBinding *rb = &psResourceBindings[i];
+		if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE)
+			continue;
+
+		HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match
+
+		// If match not found, check if name has "sampler" prefix
+		// -> try finding a match without the prefix (DX11 style sampler case)
+		if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
+			j = info.find(rb->name.substr(7, rb->name.size() - 7));
+
+		if (j != info.end())
+			rb->ePrecision = j->second;
+	}
+}
--- a/src/UseDefineChains.cpp
+++ b/src/UseDefineChains.cpp
@ -0,0 +1,887 @@
+
+#include "internal_includes/UseDefineChains.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/Instruction.h"
+
+#include "internal_includes/ControlFlowGraph.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include <algorithm>
+
+using HLSLcc::ForEachOperand;
+
+#define DEBUG_UDCHAINS 0
+
+#if DEBUG_UDCHAINS
+// Debug mode
+static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
+{
+	DefineUseChain::iterator du = psDUChains[idx].begin();
+	UseDefineChain::iterator ud = psUDChains[idx].begin();
+	while (du != psDUChains[idx].end())
+	{
+		ASSERT(du->index == idx % 4);
+		// Check that the definition actually writes to idx
+		{
+			uint32_t tempReg = idx / 4;
+			uint32_t offs = idx - (tempReg * 4);
+			uint32_t accessMask = 1 << offs;
+			uint32_t i;
+			int found = 0;
+			for (i = 0; i < du->psInst->ui32FirstSrc; i++)
+			{
+				if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP)
+				{
+					if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg)
+					{
+						uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]);
+						if (writeMask & accessMask)
+						{
+							ASSERT(writeMask == du->writeMask);
+							found = 1;
+							break;
+						}
+					}
+				}
+			}
+			ASSERT(found);
+		}
+
+		// Check that each usage of each definition also is found in the use-define chain
+		UsageSet::iterator ul = du->usages.begin();
+		while (ul != du->usages.end())
+		{
+			// Search for the usage in the chain
+			UseDefineChain::iterator use = ud;
+			while (use != psUDChains[idx].end() && &*use != *ul)
+				use++;
+			ASSERT(use != psUDChains[idx].end());
+			ASSERT(&*use == *ul);
+
+			// Check that the mapping back is also found
+			ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end());
+
+			ul++;
+		}
+
+		du++;
+	}
+}
+
+static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
+{
+	DefineUseChain::iterator du = psDUChains[idx].begin();
+	UseDefineChain::iterator ud = psUDChains[idx].begin();
+	while (ud != psUDChains[idx].end())
+	{
+		// Check that each definition of each usage also is found in the define-use chain
+		DefineSet::iterator dl = ud->defines.begin();
+		ASSERT(ud->psOp->ui32RegisterNumber == idx / 4);
+		ASSERT(ud->index == idx % 4);
+		while (dl != ud->defines.end())
+		{
+			// Search for the definition in the chain
+			DefineUseChain::iterator def = du;
+			while (def != psDUChains[idx].end() && &*def != *dl)
+				def++;
+			ASSERT(def != psDUChains[idx].end());
+			ASSERT(&*def == *dl);
+
+			// Check that the mapping back is also found
+			ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end());
+
+			dl++;
+		}
+		ud++;
+	}
+
+}
+
+static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
+{
+	uint32_t i;
+	for (i = 0; i < tempRegs * 4; i++)
+	{
+		UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions);
+		UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions);
+	}
+}
+
+#define printf_console printf
+
+#endif
+
+using namespace HLSLcc::ControlFlow;
+using std::for_each;
+
+static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index)
+{
+	// Try to find an existing entry
+	auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de)
+	{
+		return de.psInst == def.m_Instruction && de.psOp == def.m_Operand;
+	});
+
+	if (itr != psDUChain.end())
+	{
+		return &(*itr);
+	}
+
+	// Not found, create
+	psDUChain.push_front(DefineUseChainEntry());
+	DefineUseChainEntry &de = *psDUChain.begin();
+
+	de.psInst = (Instruction *)def.m_Instruction;
+	de.psOp = (Operand *)def.m_Operand;
+	de.index = index;
+	de.writeMask = def.m_Operand->GetAccessMask();
+	de.psSiblings[index] = &de;
+
+	return &de;
+}
+
+
+
+// Do flow control analysis on the instructions and build the define-use and use-define chains
+void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg)
+{
+
+	Instruction *psFirstInstruction = &instructions[0];
+	Instruction *psLastInstruction = &instructions[instructions.size() - 1];
+
+	ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp
+
+	psDUChain.clear();
+	psUDChain.clear();
+
+	for (uint32_t i = 0; i < ui32NumTemps * 4; i++)
+	{
+		psUDChain.insert(std::make_pair(i, UseDefineChain()));
+		psDUChain.insert(std::make_pair(i, DefineUseChain()));
+	}
+
+	const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks();
+
+	// Loop through each block, first calculate the union of all the reachables of all preceding blocks
+	// and then build on that as we go along the basic block instructions
+    for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr<BasicBlock> &bptr)
+	{
+		const BasicBlock &b = *bptr.get();
+		BasicBlock::ReachableVariables rvars;
+		for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock)
+		{
+			const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock);
+			BasicBlock::RVarUnion(rvars, b.Reachable());			
+		});
+
+		// Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions
+		for (const Instruction *inst = b.First(); inst <= b.Last(); inst++)
+		{
+			// Process sources first
+			ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
+				[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
+			{
+				if (psOperand->eType != OPERAND_TYPE_TEMP)
+					return;
+
+				uint32_t tempReg = psOperand->ui32RegisterNumber;
+				uint32_t accessMask = psOperand->GetAccessMask();
+
+				// Go through each component
+				for (int k = 0; k < 4; k++)
+				{
+					if (!(accessMask & (1 << k)))
+						continue;
+
+					uint32_t regIdx = tempReg * 4 + k;
+
+					// Add an use for all visible definitions
+					psUDChain[regIdx].push_front(UseDefineChainEntry());
+					UseDefineChainEntry &ue = *psUDChain[regIdx].begin();
+					ue.psInst = (Instruction *)psInst;
+					ue.psOp = (Operand *)psOperand;
+					ue.accessMask = accessMask;
+					ue.index = k;
+					ue.psSiblings[k] = &ue;
+					// ue.siblings will be filled out later.
+
+					BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx];
+					for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def)
+					{
+						DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k);
+						ue.defines.insert(duentry);
+						duentry->usages.insert(&ue);
+					});
+				}
+				return;
+			});
+
+			// Then the destination operands
+			ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
+				[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
+			{
+				if (psOperand->eType != OPERAND_TYPE_TEMP)
+					return;
+
+				uint32_t tempReg = psOperand->ui32RegisterNumber;
+				uint32_t accessMask = psOperand->GetAccessMask();
+
+				// Go through each component
+				for (int k = 0; k < 4; k++)
+				{
+					if (!(accessMask & (1 << k)))
+						continue;
+
+					uint32_t regIdx = tempReg * 4 + k;
+
+					// Overwrite whatever's in rvars; they are killed by this
+					rvars[regIdx].clear();
+					rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand));
+
+					// Make sure the definition gets created even though it doesn't have any uses at all
+					// (happens when sampling a texture but not all channels are used etc).
+					GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k);
+					
+				}
+				return;
+			});
+		}
+	});
+
+	// Connect the siblings for all uses and definitions
+	for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair<const uint32_t, UseDefineChain> &udpair)
+	{
+		UseDefineChain &ud = udpair.second;
+		// Clear out the bottom 2 bits to get the actual base reg
+		uint32_t baseReg = udpair.first & ~(3);
+
+		for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue)
+		{
+			ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber);
+
+			// Go through each component
+			for (int k = 0; k < 4; k++)
+			{
+				// Skip components that we don't access, or the one that's our own
+				if (!(ue.accessMask & (1 << k)) || ue.index == k)
+					continue;
+
+				// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
+				UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; });
+				ASSERT(siblItr != psUDChain[baseReg + k].end());
+				UseDefineChainEntry &sibling = *siblItr;
+				ue.psSiblings[k] = &sibling;
+			}
+		});
+	});
+
+	// Same for definitions
+	for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair<const uint32_t, DefineUseChain> &dupair)
+	{
+		DefineUseChain &du = dupair.second;
+		// Clear out the bottom 2 bits to get the actual base reg
+		uint32_t baseReg = dupair.first & ~(3);
+
+		for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de)
+		{
+			ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber);
+
+			// Go through each component
+			for (int k = 0; k < 4; k++)
+			{
+				// Skip components that we don't access, or the one that's our own
+				if (!(de.writeMask & (1 << k)) || de.index == k)
+					continue;
+
+				// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
+				DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; });
+				ASSERT(siblItr != psDUChain[baseReg + k].end());
+				DefineUseChainEntry &sibling = *siblItr;
+				de.psSiblings[k] = &sibling;
+			}
+		});
+	});
+
+#if DEBUG_UDCHAINS
+	UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions);
+#endif
+}
+
+
+typedef std::vector<DefineUseChainEntry *> SplitDefinitions;
+
+// Split out a define to use a new temp register
+static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
+{
+	uint32_t newReg = *psNumTemps;
+	uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber;
+	uint32_t accessMask = defs[0]->writeMask;
+	uint32_t i, u32def;
+	uint32_t rebase, count;
+	uint32_t splitTableValue;
+
+	ASSERT(defs.size() > 0);
+	for (i = 1; i < defs.size(); i++)
+	{
+		ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg);
+		accessMask |= defs[i]->writeMask;
+	}
+
+
+	(*psNumTemps)++;
+
+
+#if DEBUG_UDCHAINS
+	UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions());
+#endif
+	ASSERT(accessMask != 0 && accessMask <= 0xf);
+	// Calculate rebase value and component count
+	rebase = 0;
+	count = 0;
+	i = accessMask;
+	while ((i & 1) == 0)
+	{
+		rebase++;
+		i = i >> 1;
+	}
+	while (i != 0)
+	{
+		count++;
+		i = i >> 1;
+	}
+
+	// Make sure there's enough room in the split table
+	if (pui32SplitTable.size() <= newReg)
+	{
+		size_t newSize = pui32SplitTable.size() * 2;
+		pui32SplitTable.resize(newSize, 0xffffffff);
+	}
+
+	// Set the original temp of the new register
+	{
+		uint32_t origTemp = oldReg;
+		while (pui32SplitTable[origTemp] != 0xffffffff)
+			origTemp = pui32SplitTable[origTemp] & 0xffff;
+
+		ASSERT(rebase < 4);
+		ASSERT(count <= 4);
+		splitTableValue = (count << 24) | (rebase << 16) | origTemp;
+
+		pui32SplitTable[newReg] = splitTableValue;
+	}
+
+	// Insert the new temps to the map
+	for (i = newReg * 4; i < newReg * 4 + 4; i++)
+	{
+		psUDChains.insert(std::make_pair(i, UseDefineChain()));
+		psDUChains.insert(std::make_pair(i, DefineUseChain()));
+	}
+
+	for (u32def = 0; u32def < defs.size(); u32def++)
+	{
+		DefineUseChainEntry *defineToSplit = defs[u32def];
+		uint32_t oldIdx = defineToSplit->index;
+#if DEBUG_UDCHAINS
+		printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count);
+#endif
+
+		// We may have moved the opcodes already because of multiple defines pointing to the same op
+		if (defineToSplit->psOp->ui32RegisterNumber != newReg)
+		{
+			ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg);
+			// Update the declaration operand
+			// Don't change possible suboperands as they are sources
+			defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
+		}
+
+		defineToSplit->writeMask >>= rebase;
+		defineToSplit->index -= rebase;
+		// Change the temp register number for all usages
+		UsageSet::iterator ul = defineToSplit->usages.begin();
+		while (ul != defineToSplit->usages.end())
+		{
+			// Already updated by one of the siblings? Skip.
+			if ((*ul)->psOp->ui32RegisterNumber != newReg)
+			{
+				ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg);
+				(*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
+			}
+
+			// Update the UD chain
+			{
+				UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin();
+				while (udLoc != psUDChains[oldReg * 4 + oldIdx].end())
+				{
+					if (&*udLoc == *ul)
+					{
+						// Move to new list
+						psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc);
+
+						if (rebase > 0)
+						{
+							(*ul)->accessMask >>= rebase;
+							(*ul)->index -= rebase;
+							memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *));
+						}
+						break;
+					}
+					udLoc++;
+				}
+			}
+
+			ul++;
+		}
+
+		// Move the define out of the old chain (if its still there)
+		{
+			// Find the define in the old chain
+			DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin();
+			while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit))
+			{
+				duLoc++;
+			}
+			ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end());
+			{
+				// Move directly to new chain
+				psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc);
+				if (rebase != 0)
+				{
+					memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *));
+				}
+			}
+
+		}
+
+	}
+
+#if DEBUG_UDCHAINS
+	UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions());
+#endif
+}
+
+// Adds a define and all its siblings to the list, checking duplicates
+static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef)
+{
+	uint32_t k;
+	for (k = 0; k < 4; k++)
+	{
+		if (newDef->psSiblings[k])
+		{
+			DefineUseChainEntry *defToAdd = newDef->psSiblings[k];
+			uint32_t m;
+			int defFound = 0;
+			for (m = 0; m < defs.size(); m++)
+			{
+				if (defs[m] == defToAdd)
+				{
+					defFound = 1;
+					break;
+				}
+			}
+			if (defFound == 0)
+			{
+				defs.push_back(newDef->psSiblings[k]);
+			}
+		}
+	}
+}
+
+// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place
+static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
+{
+	uint32_t reg;
+	uint32_t combinedMask;
+	uint32_t i, k, u32def;
+	int canSplit = 1;
+	DefineUseChain::iterator du;
+	int hasLeftoverDefinitions = 0;
+	// Initial checks: all definitions must:
+	// Access the same register
+	// Have at least one definition in any of the 4 register slots that isn't included
+	if (defs.empty())
+		return 0;
+
+	reg = defs[0]->psOp->ui32RegisterNumber;
+	combinedMask = defs[0]->writeMask;
+	for (i = 1; i < defs.size(); i++)
+	{
+		if (reg != defs[i]->psOp->ui32RegisterNumber)
+			return 0;
+
+		combinedMask |= defs[i]->writeMask;
+	}
+	for (i = 0; i < 4; i++)
+	{
+		du = psDUChains[reg * 4 + i].begin();
+		while (du != psDUChains[reg * 4 + i].end())
+		{
+			int defFound = 0;
+			for (k = 0; k < defs.size(); k++)
+			{
+				if (&*du == defs[k])
+				{
+					defFound = 1;
+					break;
+				}
+			}
+			if (defFound == 0)
+			{
+				hasLeftoverDefinitions = 1;
+				break;
+			}
+			du++;
+		}
+		if (hasLeftoverDefinitions)
+			break;
+	}
+	// We'd be splitting the entire register and all its definitions, no point in that.
+	if (hasLeftoverDefinitions == 0)
+		return 0;
+
+	// Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array.
+	for (u32def = 0; u32def < defs.size(); u32def++)
+	{
+		DefineUseChainEntry *def = defs[u32def];
+
+		UsageSet::iterator ul = def->usages.begin();
+		while (ul != def->usages.end())
+		{
+			uint32_t j;
+
+			// Check that we only read a subset of the combined writemask
+			if (((*ul)->accessMask & (~combinedMask)) != 0)
+			{
+				// Do an additional attempt, pick up all the sibling definitions as well
+				// Only do this if we have the space in the definitions table
+				for (j = 0; j < 4; j++)
+				{
+					if (((*ul)->accessMask & (1 << j)) == 0)
+						continue;
+					AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin());
+				}
+				return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
+
+			}
+
+			// It must have at least one declaration
+			ASSERT(!(*ul)->defines.empty());
+
+			// Check that all siblings for the usage use one of the definitions
+			for (j = 0; j < 4; j++)
+			{
+				uint32_t m;
+				int defineFound = 0;
+				if (((*ul)->accessMask & (1 << j)) == 0)
+					continue;
+
+				ASSERT((*ul)->psSiblings[j] != NULL);
+				ASSERT(!(*ul)->psSiblings[j]->defines.empty());
+
+				// Check that all definitions for this usage are found from the definitions table
+				DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin();
+				while (dl != (*ul)->psSiblings[j]->defines.end())
+				{
+					defineFound = 0;
+					for (m = 0; m < defs.size(); m++)
+					{
+						if (*dl == defs[m])
+						{
+							defineFound = 1;
+							break;
+						}
+					}
+					if (defineFound == 0)
+					{
+						// Add this define and all its siblings to the table and try again
+						AddDefineToList(defs, *dl);
+						return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
+						canSplit = 0;
+						break;
+					}
+
+					dl++;
+				}
+
+				if (defineFound == 0)
+				{
+					canSplit = 0;
+					break;
+				}
+			}
+			if (canSplit == 0)
+				break;
+
+			// This'll do, check next usage
+			ul++;
+		}
+		if (canSplit == 0)
+			break;
+
+	}
+	if (canSplit)
+	{
+		UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
+		return 1;
+	}
+	return 0;
+}
+
+// Do temp splitting based on use-define chains
+void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
+{
+	// Algorithm overview:
+	// Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable),
+	// split it out.
+	uint32_t i;
+	uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition
+	for (i = 0; i < tempsAtStart * 4; i++)
+	{
+		// No definitions?
+		if (psDUChains[i].empty())
+			continue;
+
+		DefineUseChain::iterator du = psDUChains[i].begin();
+		// Ok we have multiple definitions for a temp, check them through
+		while (du != psDUChains[i].end())
+		{
+			SplitDefinitions sd;
+			AddDefineToList(sd, &*du);
+			du++;
+			// If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain
+			if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable))
+			{
+				du = psDUChains[i].begin();
+			}
+		}
+	}
+}
+
+// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process.
+// Sampler ops, bitwise ops and comparisons are ignored.
+static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType)
+{
+	Instruction *psInst = du->psInst;
+	int hasFullPrecOperands = 0;
+	uint32_t i;
+
+	if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
+		return 0;
+
+	switch (psInst->eOpcode)
+	{
+	case OPCODE_ADD:
+	case OPCODE_MUL:
+	case OPCODE_MOV:
+	case OPCODE_MAD:
+	case OPCODE_DIV:
+	case OPCODE_LOG:
+	case OPCODE_EXP:
+	case OPCODE_MAX:
+	case OPCODE_MIN:
+	case OPCODE_DP2:
+	case OPCODE_DP2ADD:
+	case OPCODE_DP3:
+	case OPCODE_DP4:
+	case OPCODE_RSQ:
+	case OPCODE_SQRT:
+		break;
+	default:
+		return 0;
+	}
+
+	for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++)
+	{
+		Operand *op = &psInst->asOperands[i];
+		if (op->eType == OPERAND_TYPE_IMMEDIATE32)
+			continue; // Immediate values are ignored
+
+		if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
+		{
+			hasFullPrecOperands = 1;
+			break;
+		}
+	}
+
+	if (hasFullPrecOperands)
+		return 0;
+
+	if (pType)
+		*pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump
+
+	return 1;
+}
+
+// Returns true if all the usages of this definitions are instructions that deal with floating point data
+static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
+{
+	UsageSet::iterator itr = du->usages.begin();
+	for (; itr != du->usages.end(); itr++)
+	{
+		Instruction *psInst = (*itr)->psInst;
+	
+		if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
+			return false;
+
+		switch (psInst->eOpcode)
+		{
+		case OPCODE_ADD:
+		case OPCODE_MUL:
+		case OPCODE_MOV:
+		case OPCODE_MAD:
+		case OPCODE_DIV:
+		case OPCODE_LOG:
+		case OPCODE_EXP:
+		case OPCODE_MAX:
+		case OPCODE_MIN:
+		case OPCODE_DP2:
+		case OPCODE_DP2ADD:
+		case OPCODE_DP3:
+		case OPCODE_DP4:
+		case OPCODE_RSQ:
+		case OPCODE_SQRT:
+			break;
+		default:
+			return false;
+		}
+	}
+	return true;
+}
+
+// Based on the sampler precisions, downgrade the definitions if possible.
+void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps)
+{
+	uint32_t madeProgress = 0;
+	do
+	{
+		uint32_t i;
+		madeProgress = 0;
+		for (i = 0; i < ui32NumTemps * 4; i++)
+		{
+			DefineUseChain::iterator du = psDUChains[i].begin();
+			while (du != psDUChains[i].end())
+			{
+				OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
+				if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
+					|| CanDowngradeDefinitionPrecision(du, &sType))
+					&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
+					&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
+					&& du->isStandalone
+					&& HasOnlyFloatUsages(du))
+				{
+					uint32_t sibl;
+					// Ok we can change the precision.
+					ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP);
+					ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT);
+					du->psOp->eMinPrecision = sType;
+
+					// Update all the uses of all the siblings
+					for (sibl = 0; sibl < 4; sibl++)
+					{
+						if (!du->psSiblings[sibl])
+							continue;
+
+						UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
+						while (ul != du->psSiblings[sibl]->usages.end())
+						{
+							ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT ||
+								(*ul)->psOp->eMinPrecision == sType);
+							// We may well write this multiple times to the same op but that's fine.
+							(*ul)->psOp->eMinPrecision = sType;
+
+							ul++;
+						}
+					}
+					madeProgress = 1;
+				}
+				du++;
+			}
+		}
+	} while (madeProgress != 0);
+
+}
+
+void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps)
+{
+	uint32_t i;
+	for (i = 0; i < ui32NumTemps * 4; i++)
+	{
+		DefineUseChain::iterator du = psDUChains[i].begin();
+		while (du != psDUChains[i].end())
+		{
+			uint32_t sibl;
+			int isStandalone = 1;
+			if (du->isStandalone)
+			{
+				du++;
+				continue;
+			}
+
+			for (sibl = 0; sibl < 4; sibl++)
+			{
+				if (!du->psSiblings[sibl])
+					continue;
+
+				UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
+				while (ul != du->psSiblings[sibl]->usages.end())
+				{
+					uint32_t k;
+					ASSERT(!(*ul)->defines.empty());
+
+					// Need to check that all the siblings of this usage only see this definition's corresponding sibling
+					for (k = 0; k < 4; k++)
+					{
+						if (!(*ul)->psSiblings[k])
+							continue;
+
+						if ((*ul)->psSiblings[k]->defines.size() > 1
+							|| *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k])
+						{
+							isStandalone = 0;
+							break;
+						}
+					}
+					if (isStandalone == 0)
+						break;
+
+					ul++;
+				}
+				if (isStandalone == 0)
+					break;
+			}
+
+			if (isStandalone)
+			{
+				// Yep, mark it
+				for (sibl = 0; sibl < 4; sibl++)
+				{
+					if (!du->psSiblings[sibl])
+						continue;
+					du->psSiblings[sibl]->isStandalone = 1;
+				}
+			}
+			du++;
+		}
+	}
+}
+
+// Write the uses and defines back to Instruction and Operand member lists.
+void WriteBackUsesAndDefines(DefineUseChains &psDUChains)
+{
+	using namespace std;
+	// Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them
+	for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr)
+	{
+		const DefineUseChain &duChain = itr.second;
+		for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du)
+		{
+			for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage)
+			{
+				// Update instruction use list
+				du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp));
+				// And the usage's definition
+				usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp));
+
+			});
+		});
+	});
+}
--- a/src/cbstring/bsafe.c
+++ b/src/cbstring/bsafe.c
@ -0,0 +1,85 @@
+/*
+ * This source file is part of the bstring string library.  This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
+ * for details on usage and license.
+ */
+
+/*
+ * bsafe.c
+ *
+ * This is an optional module that can be used to help enforce a safety
+ * standard based on pervasive usage of bstrlib.  This file is not necessarily
+ * portable, however, it has been tested to work correctly with Intel's C/C++
+ * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "bsafe.h"
+
+static int bsafeShouldExit = 1;
+
+#if 0
+char * strcpy (char *dst, const char *src);
+char * strcat (char *dst, const char *src);
+
+char * strcpy (char *dst, const char *src) {
+	dst = dst;
+	src = src;
+	fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+char * strcat (char *dst, const char *src) {
+	dst = dst;
+	src = src;
+	fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
+char * (gets) (char * buf) {
+	buf = buf;
+	fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+#endif
+
+char * (strncpy) (char *dst, const char *src, size_t n) {
+	dst = dst;
+	src = src;
+	n = n;
+	fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+char * (strncat) (char *dst, const char *src, size_t n) {
+	dst = dst;
+	src = src;
+	n = n;
+	fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+char * (strtok) (char *s1, const char *s2) {
+	s1 = s1;
+	s2 = s2;
+	fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+char * (strdup) (const char *s) {
+	s = s;
+	fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
+	if (bsafeShouldExit) exit (-1);
+	return NULL;
+}
+
+#endif
--- a/src/cbstring/bsafe.h
+++ b/src/cbstring/bsafe.h
@ -0,0 +1,43 @@
+/*
+ * This source file is part of the bstring string library.  This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
+ * for details on usage and license.
+ */
+
+/*
+ * bsafe.h
+ *
+ * This is an optional module that can be used to help enforce a safety
+ * standard based on pervasive usage of bstrlib.  This file is not necessarily
+ * portable, however, it has been tested to work correctly with Intel's C/C++
+ * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
+ */
+
+#ifndef BSTRLIB_BSAFE_INCLUDE
+#define BSTRLIB_BSAFE_INCLUDE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
+/* This is caught in the linker, so its not necessary for gcc. */
+extern char * (gets) (char * buf);
+#endif
+
+extern char * (strncpy) (char *dst, const char *src, size_t n);
+extern char * (strncat) (char *dst, const char *src, size_t n);
+extern char * (strtok) (char *s1, const char *s2);
+extern char * (strdup) (const char *s);
+
+#undef strcpy
+#undef strcat
+#define strcpy(a,b) bsafe_strcpy(a,b) 
+#define strcat(a,b) bsafe_strcat(a,b) 
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/cbstring/bstraux.c
+++ b/src/cbstring/bstraux.c
--- a/src/cbstring/bstraux.h
+++ b/src/cbstring/bstraux.h
@ -0,0 +1,112 @@
+/*
+ * This source file is part of the bstring string library.  This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
+ * for details on usage and license.
+ */
+
+/*
+ * bstraux.h
+ *
+ * This file is not a necessary part of the core bstring library itself, but
+ * is just an auxilliary module which includes miscellaneous or trivial 
+ * functions.
+ */
+
+#ifndef BSTRAUX_INCLUDE
+#define BSTRAUX_INCLUDE
+
+#include <time.h>
+#include "bstrlib.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Safety mechanisms */
+#define bstrDeclare(b)               bstring (b) = NULL; 
+#define bstrFree(b)                  {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
+
+/* Backward compatibilty with previous versions of Bstrlib */
+#define bAssign(a,b)                 ((bassign)((a), (b)))
+#define bSubs(b,pos,len,a,c)         ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
+#define bStrchr(b,c)                 ((bstrchr)((b), (c)))
+#define bStrchrFast(b,c)             ((bstrchr)((b), (c)))
+#define bCatCstr(b,s)                ((bcatcstr)((b), (s)))
+#define bCatBlk(b,s,len)             ((bcatblk)((b),(s),(len)))
+#define bCatStatic(b,s)              bCatBlk ((b), ("" s ""), sizeof (s) - 1)
+#define bTrunc(b,n)                  ((btrunc)((b), (n)))
+#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos)))
+#define bUppercase(b)                ((btoupper)(b))
+#define bLowercase(b)                ((btolower)(b))
+#define bCaselessCmp(a,b)            ((bstricmp)((a), (b)))
+#define bCaselessNCmp(a,b,n)         ((bstrnicmp)((a), (b), (n)))
+#define bBase64Decode(b)             (bBase64DecodeEx ((b), NULL))
+#define bUuDecode(b)                 (bUuDecodeEx ((b), NULL))
+
+/* Unusual functions */
+extern struct bStream * bsFromBstr (const_bstring b);
+extern bstring bTail (bstring b, int n);
+extern bstring bHead (bstring b, int n);
+extern int bSetCstrChar (bstring a, int pos, char c);
+extern int bSetChar (bstring b, int pos, char c);
+extern int bFill (bstring a, char c, int len);
+extern int bReplicate (bstring b, int n);
+extern int bReverse (bstring b);
+extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill);
+extern bstring bStrfTime (const char * fmt, const struct tm * timeptr);
+#define bAscTime(t) (bStrfTime ("%c\n", (t)))
+#define bCTime(t)   ((t) ? bAscTime (localtime (t)) : NULL)
+
+/* Spacing formatting */
+extern int bJustifyLeft (bstring b, int space);
+extern int bJustifyRight (bstring b, int width, int space);
+extern int bJustifyMargin (bstring b, int width, int space);
+extern int bJustifyCenter (bstring b, int width, int space);
+
+/* Esoteric standards specific functions */
+extern char * bStr2NetStr (const_bstring b);
+extern bstring bNetStr2Bstr (const char * buf);
+extern bstring bBase64Encode (const_bstring b);
+extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError);
+extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines);
+extern bstring bUuDecodeEx (const_bstring src, int * badlines);
+extern bstring bUuEncode (const_bstring src);
+extern bstring bYEncode (const_bstring src);
+extern bstring bYDecode (const_bstring src);
+
+/* Writable stream */
+typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
+
+struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm);
+int bwsWriteBstr (struct bwriteStream * stream, const_bstring b);
+int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len);
+int bwsWriteFlush (struct bwriteStream * stream);
+int bwsIsEOF (const struct bwriteStream * stream);
+int bwsBuffLength (struct bwriteStream * stream, int sz);
+void * bwsClose (struct bwriteStream * stream);
+
+/* Security functions */
+#define bSecureDestroy(b) {	                                            \
+bstring bstr__tmp = (b);	                                            \
+	if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) {          \
+	    (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen);   \
+	    bdestroy (bstr__tmp);                                           \
+	}                                                                   \
+}
+#define bSecureWriteProtect(t) {	                                              \
+	if ((t).mlen >= 0) {                                                          \
+	    if ((t).mlen > (t).slen)) {                                               \
+	        (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
+	    }                                                                         \
+	    (t).mlen = -1;                                                            \
+	}                                                                             \
+}
+extern bstring bSecureInput (int maxlen, int termchar, 
+                             bNgetc vgetchar, void * vgcCtx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/cbstring/bstrlib.c
+++ b/src/cbstring/bstrlib.c
--- a/src/cbstring/bstrlib.h
+++ b/src/cbstring/bstrlib.h
@ -0,0 +1,304 @@
+/*
+ * This source file is part of the bstring string library.  This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
+ * for details on usage and license.
+ */
+
+/*
+ * bstrlib.h
+ *
+ * This file is the header file for the core module for implementing the 
+ * bstring functions.
+ */
+
+#ifndef BSTRLIB_INCLUDE
+#define BSTRLIB_INCLUDE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+
+#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
+# if defined (__TURBOC__) && !defined (__BORLANDC__)
+#  define BSTRLIB_NOVSNP
+# endif
+#endif
+
+#define BSTR_ERR (-1)
+#define BSTR_OK (0)
+#define BSTR_BS_BUFF_LENGTH_GET (0)
+
+typedef struct tagbstring * bstring;
+typedef const struct tagbstring * const_bstring;
+
+/* Copy functions */
+#define cstr2bstr bfromcstr
+extern bstring bfromcstr (const char * str);
+extern bstring bfromcstralloc (int mlen, const char * str);
+extern bstring blk2bstr (const void * blk, int len);
+extern char * bstr2cstr (const_bstring s, char z);
+extern int bcstrfree (char * s);
+extern bstring bstrcpy (const_bstring b1);
+extern int bassign (bstring a, const_bstring b);
+extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
+extern int bassigncstr (bstring a, const char * str);
+extern int bassignblk (bstring a, const void * s, int len);
+
+/* Destroy function */
+extern int bdestroy (bstring b);
+
+/* Space allocation hinting functions */
+extern int balloc (bstring s, int len);
+extern int ballocmin (bstring b, int len);
+
+/* Substring extraction */
+extern bstring bmidstr (const_bstring b, int left, int len);
+
+/* Various standard manipulations */
+extern int bconcat (bstring b0, const_bstring b1);
+extern int bconchar (bstring b0, char c);
+extern int bcatcstr (bstring b, const char * s);
+extern int bcatblk (bstring b, const void * s, int len);
+extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
+extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
+extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
+extern int bdelete (bstring s1, int pos, int len);
+extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
+extern int btrunc (bstring b, int n);
+
+/* Scan/search functions */
+extern int bstricmp (const_bstring b0, const_bstring b1);
+extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
+extern int biseqcaseless (const_bstring b0, const_bstring b1);
+extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
+extern int biseq (const_bstring b0, const_bstring b1);
+extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
+extern int biseqcstr (const_bstring b, const char * s);
+extern int biseqcstrcaseless (const_bstring b, const char * s);
+extern int bstrcmp (const_bstring b0, const_bstring b1);
+extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
+extern int binstr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int bstrchrp (const_bstring b, int c, int pos);
+extern int bstrrchrp (const_bstring b, int c, int pos);
+#define bstrchr(b,c) bstrchrp ((b), (c), 0)
+#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
+extern int binchr (const_bstring b0, int pos, const_bstring b1);
+extern int binchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
+extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
+
+/* List of string container functions */
+struct bstrList {
+    int qty, mlen;
+    bstring * entry;
+};
+extern struct bstrList * bstrListCreate (void);
+extern int bstrListDestroy (struct bstrList * sl);
+extern int bstrListAlloc (struct bstrList * sl, int msz);
+extern int bstrListAllocMin (struct bstrList * sl, int msz);
+
+/* String split and join functions */
+extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
+extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
+extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
+extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
+extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+	int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+	int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+	int (* cb) (void * parm, int ofs, int len), void * parm);
+
+/* Miscellaneous functions */
+extern int bpattern (bstring b, int len);
+extern int btoupper (bstring b);
+extern int btolower (bstring b);
+extern int bltrimws (bstring b);
+extern int brtrimws (bstring b);
+extern int btrimws (bstring b);
+
+/* <*>printf format functions */
+#if !defined (BSTRLIB_NOVSNP)
+extern bstring bformat (const char * fmt, ...);
+extern int bformata (bstring b, const char * fmt, ...);
+extern int bassignformat (bstring b, const char * fmt, ...);
+extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
+
+#define bvformata(ret, b, fmt, lastarg) { \
+bstring bstrtmp_b = (b); \
+const char * bstrtmp_fmt = (fmt); \
+int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
+	for (;;) { \
+		va_list bstrtmp_arglist; \
+		va_start (bstrtmp_arglist, lastarg); \
+		bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
+		va_end (bstrtmp_arglist); \
+		if (bstrtmp_r >= 0) { /* Everything went ok */ \
+			bstrtmp_r = BSTR_OK; \
+			break; \
+		} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
+			bstrtmp_r = BSTR_ERR; \
+			break; \
+		} \
+		bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
+	} \
+	ret = bstrtmp_r; \
+}
+
+#endif
+
+typedef int (*bNgetc) (void *parm);
+typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
+
+/* Input functions */
+extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
+extern bstring bread (bNread readPtr, void * parm);
+extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int breada (bstring b, bNread readPtr, void * parm);
+
+/* Stream functions */
+extern struct bStream * bsopen (bNread readPtr, void * parm);
+extern void * bsclose (struct bStream * s);
+extern int bsbufflength (struct bStream * s, int sz);
+extern int bsreadln (bstring b, struct bStream * s, char terminator);
+extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
+extern int bsread (bstring b, struct bStream * s, int n);
+extern int bsreadlna (bstring b, struct bStream * s, char terminator);
+extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
+extern int bsreada (bstring b, struct bStream * s, int n);
+extern int bsunread (struct bStream * s, const_bstring b);
+extern int bspeek (bstring r, const struct bStream * s);
+extern int bssplitscb (struct bStream * s, const_bstring splitStr, 
+	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, 
+	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bseof (const struct bStream * s);
+
+struct tagbstring {
+	int mlen;
+	int slen;
+	unsigned char * data;
+};
+
+/* Accessor macros */
+#define blengthe(b, e)      (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
+#define blength(b)          (blengthe ((b), 0))
+#define bdataofse(b, o, e)  (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
+#define bdataofs(b, o)      (bdataofse ((b), (o), (void *)0))
+#define bdatae(b, e)        (bdataofse (b, 0, e))
+#define bdata(b)            (bdataofs (b, 0))
+#define bchare(b, p, e)     ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
+#define bchar(b, p)         bchare ((b), (p), '\0')
+
+/* Static constant string initialization macro */
+#define bsStaticMlen(q,m)   {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
+#if defined(_MSC_VER)
+/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
+# define bsStatic(q)        bsStaticMlen(q,-32)
+#endif
+#ifndef bsStatic
+# define bsStatic(q)        bsStaticMlen(q,-__LINE__)
+#endif
+
+/* Static constant block parameter pair */
+#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
+
+/* Reference building macros */
+#define cstr2tbstr btfromcstr
+#define btfromcstr(t,s) {                                            \
+    (t).data = (unsigned char *) (s);                                \
+    (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
+    (t).mlen = -1;                                                   \
+}
+#define blk2tbstr(t,s,l) {            \
+    (t).data = (unsigned char *) (s); \
+    (t).slen = l;                     \
+    (t).mlen = -1;                    \
+}
+#define btfromblk(t,s,l) blk2tbstr(t,s,l)
+#define bmid2tbstr(t,b,p,l) {                                                \
+    const_bstring bstrtmp_s = (b);                                           \
+    if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) {              \
+        int bstrtmp_left = (p);                                              \
+        int bstrtmp_len  = (l);                                              \
+        if (bstrtmp_left < 0) {                                              \
+            bstrtmp_len += bstrtmp_left;                                     \
+            bstrtmp_left = 0;                                                \
+        }                                                                    \
+        if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left)                    \
+            bstrtmp_len = bstrtmp_s->slen - bstrtmp_left;                    \
+        if (bstrtmp_len <= 0) {                                              \
+            (t).data = (unsigned char *)"";                                  \
+            (t).slen = 0;                                                    \
+        } else {                                                             \
+            (t).data = bstrtmp_s->data + bstrtmp_left;                       \
+            (t).slen = bstrtmp_len;                                          \
+        }                                                                    \
+    } else {                                                                 \
+        (t).data = (unsigned char *)"";                                      \
+        (t).slen = 0;                                                        \
+    }                                                                        \
+    (t).mlen = -__LINE__;                                                    \
+}
+#define btfromblkltrimws(t,s,l) {                                            \
+    int bstrtmp_idx = 0, bstrtmp_len = (l);                                  \
+    unsigned char * bstrtmp_s = (s);                                         \
+    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
+        for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) {                   \
+            if (!isspace (bstrtmp_s[bstrtmp_idx])) break;                    \
+        }                                                                    \
+    }                                                                        \
+    (t).data = bstrtmp_s + bstrtmp_idx;                                      \
+    (t).slen = bstrtmp_len - bstrtmp_idx;                                    \
+    (t).mlen = -__LINE__;                                                    \
+}
+#define btfromblkrtrimws(t,s,l) {                                            \
+    int bstrtmp_len = (l) - 1;                                               \
+    unsigned char * bstrtmp_s = (s);                                         \
+    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
+        for (; bstrtmp_len >= 0; bstrtmp_len--) {                            \
+            if (!isspace (bstrtmp_s[bstrtmp_len])) break;                    \
+        }                                                                    \
+    }                                                                        \
+    (t).data = bstrtmp_s;                                                    \
+    (t).slen = bstrtmp_len + 1;                                              \
+    (t).mlen = -__LINE__;                                                    \
+}
+#define btfromblktrimws(t,s,l) {                                             \
+    int bstrtmp_idx = 0, bstrtmp_len = (l) - 1;                              \
+    unsigned char * bstrtmp_s = (s);                                         \
+    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
+        for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) {                  \
+            if (!isspace (bstrtmp_s[bstrtmp_idx])) break;                    \
+        }                                                                    \
+        for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) {                  \
+            if (!isspace (bstrtmp_s[bstrtmp_len])) break;                    \
+        }                                                                    \
+    }                                                                        \
+    (t).data = bstrtmp_s + bstrtmp_idx;                                      \
+    (t).slen = bstrtmp_len + 1 - bstrtmp_idx;                                \
+    (t).mlen = -__LINE__;                                                    \
+}
+
+/* Write protection macros */
+#define bwriteprotect(t)     { if ((t).mlen >=  0) (t).mlen = -1; }
+#define bwriteallow(t)       { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
+#define biswriteprotected(t) ((t).mlen <= 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/cbstring/bstrlib.txt
+++ b/src/cbstring/bstrlib.txt
--- a/src/cbstring/license.txt
+++ b/src/cbstring/license.txt
@ -0,0 +1,29 @@
+Copyright (c) 2002-2008 Paul Hsieh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+    Redistributions of source code must retain the above copyright notice, 
+    this list of conditions and the following disclaimer. 
+
+    Redistributions in binary form must reproduce the above copyright notice, 
+    this list of conditions and the following disclaimer in the documentation 
+    and/or other materials provided with the distribution. 
+
+    Neither the name of bstrlib nor the names of its contributors may be used 
+    to endorse or promote products derived from this software without 
+    specific prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+POSSIBILITY OF SUCH DAMAGE.
+
--- a/src/cbstring/porting.txt
+++ b/src/cbstring/porting.txt
@ -0,0 +1,172 @@
+Better String library Porting Guide
+-----------------------------------
+
+by Paul Hsieh
+
+The bstring library is an attempt to provide improved string processing 
+functionality to the C and C++ language.  At the heart of the bstring library 
+is the management of "bstring"s which are a significant improvement over '\0'
+terminated char buffers.  See the accompanying documenation file bstrlib.txt
+for more information.
+
+===============================================================================
+
+Identifying the Compiler
+------------------------
+
+Bstrlib has been tested on the following compilers:
+
+    Microsoft Visual C++
+    Watcom C/C++ (32 bit flat)
+    Intel's C/C++ compiler (on Windows)
+    The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
+    Borland C++
+    Turbo C
+
+There are slight differences in these compilers which requires slight 
+differences in the implementation of Bstrlib.  These are accomodated in the
+same sources using #ifdef/#if defined() on compiler specific macros.  To
+port Bstrlib to a new compiler not listed above, it is recommended that the
+same strategy be followed.  If you are unaware of the compiler specific 
+identifying preprocessor macro for your compiler you might find it here:
+
+http://predef.sourceforge.net/precomp.html
+
+Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
+
+16-bit vs. 32-bit vs. 64-bit Systems
+------------------------------------
+
+Bstrlib has been architected to deal with strings of length between 0 and
+INT_MAX (inclusive).  Since the values of int are never higher than size_t
+there will be no issue here.  Note that on most 64-bit systems int is 32-bit.
+
+Dependency on The C-Library
+---------------------------
+
+Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and 
+vsnprintf.  Many free standing C compiler implementations that have a mode in 
+which the C library is not available will typically not include these 
+functions which will make porting Bstrlib to it onerous.  Bstrlib is not 
+designed for such bare bones compiler environments.  This usually includes 
+compilers that target ROM environments.
+
+Porting Issues
+--------------
+
+Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there 
+are still a few porting issues.  These are described below.
+
+1. The vsnprintf () function.
+
+Unfortunately, the earlier ANSI/ISO C standards did not include this function.
+If the compiler of interest does not support this function then the 
+BSTRLIB_NOVSNP should be defined via something like:
+
+    #if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
+    # if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
+    #  define BSTRLIB_NOVSNP
+    # endif
+    #endif
+
+which appears at the top of bstrlib.h.  Note that the bformat(a) functions 
+will not be declared or implemented if the BSTRLIB_NOVSNP macro is set.  If 
+the compiler has renamed vsnprintf() to some other named function, then 
+search for the definition of the exvsnprintf macro in bstrlib.c file and be 
+sure its defined appropriately:
+
+    #if defined (__COMPILERVENDORSPECIFICMACRO__)
+    # define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
+    #else
+    # define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
+    #endif
+
+Take notice of the return value being captured in the variable r.  It is 
+assumed that r exceeds n if and only if the underlying vsnprintf function has
+determined what the true maximal output length would be for output if the 
+buffer were large enough to hold it.  Non-modern implementations must output a
+lesser number (the macro can and should be modified to ensure this).
+
+2. Weak C++ compiler.
+
+C++ is a much more complicated language to implement than C.  This has lead 
+to varying quality of compiler implementations.  The weaknesses isolated in
+the initial ports are inclusion of the Standard Template Library, 
+std::iostream and exception handling.  By default it is assumed that the C++
+compiler supports all of these things correctly.  If your compiler does not
+support one or more of these define the corresponding macro:
+
+    BSTRLIB_CANNOT_USE_STL
+    BSTRLIB_CANNOT_USE_IOSTREAM
+    BSTRLIB_DOESNT_THROW_EXCEPTIONS
+
+The compiler specific detected macro should be defined at the top of 
+bstrwrap.h in the Configuration defines section.  Note that these disabling
+macros can be overrided with the associated enabling macro if a subsequent
+version of the compiler gains support.  (For example, its possible to rig
+up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
+can be passed in as a compiler option.)
+
+3. The bsafe module, and reserved words.
+
+The bsafe module is in gross violation of the ANSI/ISO C standard in the 
+sense that it redefines what could be implemented as reserved words on a 
+given compiler.  The typical problem is that a compiler may inline some of the 
+functions and thus not be properly overridden by the definitions in the bsafe 
+module.  It is also possible that a compiler may prohibit the redefinitions in 
+the bsafe module.  Compiler specific action will be required to deal with 
+these situations.
+
+Platform Specific Files
+-----------------------
+
+The makefiles for the examples are basically setup of for particular 
+environments for each platform.  In general these makefiles are not portable
+and should be constructed as necessary from scratch for each platform.
+
+Testing a port
+--------------
+
+To test that a port compiles correctly do the following:
+
+1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and 
+   bsafe modules.
+2. Compile bstest against the bstrlib module.
+3. Run bstest and ensure that 0 errors are reported.
+4. Compile test against the bstrlib and bstrwrap modules.
+5. Run test and ensure that 0 errors are reported.
+6. Compile each of the examples (except for the "re" example, which may be 
+   complicated and is not a real test of bstrlib and except for the mfcbench 
+   example which is Windows specific.)
+7. Run each of the examples.
+
+The builds must have 0 errors, and should have the absolute minimum number of
+warnings (in most cases can be reduced to 0.)  The result of execution should 
+be essentially identical on each platform.
+
+Performance
+-----------
+
+Different CPU and compilers have different capabilities in terms of 
+performance.  It is possible for Bstrlib to assume performance 
+characteristics that a platform doesn't have (since it was primarily 
+developed on just one platform).  The goal of Bstrlib is to provide very good 
+performance on all platforms regardless of this but without resorting to 
+extreme measures (such as using assembly language, or non-portable intrinsics 
+or library extensions.)
+
+There are two performance benchmarks that can be found in the example/ 
+directory.  They are: cbench.c and cppbench.cpp.  These are variations and 
+expansions of a benchmark for another string library.  They don't cover all
+string functionality, but do include the most basic functions which will be
+common in most string manipulation kernels.
+
+...............................................................................
+
+Feedback
+--------
+
+In all cases, you may email issues found to the primary author of Bstrlib at 
+the email address: websnarf@users.sourceforge.net
+
+===============================================================================
--- a/src/cbstring/security.txt
+++ b/src/cbstring/security.txt
@ -0,0 +1,221 @@
+Better String library Security Statement
+----------------------------------------
+
+by Paul Hsieh
+
+===============================================================================
+
+Introduction
+------------
+
+The Better String library (hereafter referred to as Bstrlib) is an attempt to 
+provide improved string processing functionality to the C and C++ languages.  
+At the heart of the Bstrlib is the management of "bstring"s which are a 
+significant improvement over '\0' terminated char buffers.  See the 
+accompanying documenation file bstrlib.txt for more information.
+
+DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 
+NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
+OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Like any software, there is always a possibility of failure due to a flawed
+implementation.  Nevertheless a good faith effort has been made to minimize 
+such flaws in Bstrlib.  Also, use of Bstrlib by itself will not make an 
+application secure or free from implementation failures.  However, it is the
+author's conviction that use of Bstrlib can greatly facilitate the creation 
+of software meeting the highest possible standards of security.
+
+Part of the reason why this document has been created, is for the purpose of 
+security auditing, or the creation of further "Statements on Security" for 
+software that is created that uses Bstrlib.  An auditor may check the claims 
+below against Bstrlib, and use this as a basis for analysis of software which 
+uses Bstrlib.
+
+===============================================================================
+
+Statement on Security
+---------------------
+
+This is a document intended to give consumers of the Better String Library
+who are interested in security an idea of where the Better String Library 
+stands on various security issues.  Any deviation observed in the actual 
+library itself from the descriptions below should be considered an 
+implementation error, not a design flaw.
+
+This statement is not an analytical proof of correctness or an outline of one
+but rather an assertion similar to a scientific claim or hypothesis.  By use,
+testing and open independent examination (otherwise known as scientific 
+falsifiability), the credibility of the claims made below can rise to the 
+level of an established theory.
+
+Common security issues:
+.......................
+
+1. Buffer Overflows
+
+The Bstrlib API allows the programmer a way to deal with strings without 
+having to deal with the buffers containing them.  Ordinary usage of the 
+Bstrlib API itself makes buffer overflows impossible.
+
+Furthermore, the Bstrlib API has a superset of basic string functionality as 
+compared to the C library's char * functions, C++'s std::string class and 
+Microsoft's MFC based CString class.  It also has abstracted mechanisms for 
+dealing with IO.  This is important as it gives developers a way of migrating 
+all their code from a functionality point of view.
+
+2. Memory size overflow/wrap around attack
+
+Bstrlib is, by design, impervious to memory size overflow attacks.  The 
+reason is it is resiliant to length overflows is that bstring lengths are 
+bounded above by INT_MAX, instead of ~(size_t)0.  So length addition 
+overflows cause a wrap around of the integer value making them negative 
+causing balloc() to fail before an erroneous operation can occurr.  Attempted 
+conversions of char * strings which may have lengths greater than INT_MAX are 
+detected and the conversion is aborted.
+
+It is unknown if this property holds on machines that don't represent 
+integers as 2s complement.  It is recommended that Bstrlib be carefully 
+auditted by anyone using a system which is not 2s complement based.
+
+3. Constant string protection
+
+Bstrlib implements runtime enforced constant and read-only string semantics.
+I.e., bstrings which are declared as constant via the bsStatic() macro cannot
+be modified or deallocated directly through the Bstrlib API, and this cannot
+be subverted by casting or other type coercion.  This is independent of the
+use of the const_bstring data type.
+
+The Bstrlib C API uses the type const_bstring to specify bstring parameters 
+whose contents do not change.  Although the C language cannot enforce this,
+this is nevertheless guaranteed by the implementation of the Bstrlib library
+of C functions.  The C++ API enforces the const attribute on CBString types
+correctly.
+
+4. Aliased bstring support
+
+Bstrlib detects and supports aliased parameter management throughout the API. 
+The kind of aliasing that is allowed is the one where pointers of the same
+basic type may be pointing to overlapping objects (this is the assumption the 
+ANSI C99 specification makes.)  Each function behaves as if all read-only 
+parameters were copied to temporaries which are used in their stead before 
+the function is enacted (it rarely actually does this).  No function in the 
+Bstrlib uses the "restrict" parameter attribute from the ANSI C99 
+specification.
+
+5. Information leaking
+
+In bstraux.h, using the semantically equivalent macros bSecureDestroy() and 
+bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively 
+will ensure that stale data does not linger in the heap's free space after 
+strings have been released back to memory.  Created bstrings or CBStrings
+are not linked to anything external to themselves, and thus cannot expose 
+deterministic data leaking.  If a bstring is resized, the preimage may exist
+as a copy that is released to the heap.  Thus for sensitive data, the bstring 
+should be sufficiently presized before manipulated so that it is not resized. 
+bSecureInput() has been supplied in bstraux.c, which can be used to obtain 
+input securely without any risk of leaving any part of the input image in the 
+heap except for the allocated bstring that is returned.
+
+6. Memory leaking
+
+Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG 
+macro.  User generated definitions for malloc, realloc and free can then be
+supplied which can implement special strategies for memory corruption 
+detection or memory leaking.  Otherwise, bstrlib does not do anything out of 
+the ordinary to attempt to deal with the standard problem of memory leaking 
+(i.e., losing references to allocated memory) when programming in the C and 
+C++ languages.  However, it does not compound the problem any more than exists 
+either, as it doesn't have any intrinsic inescapable leaks in it.  Bstrlib 
+does not preclude the use of automatic garbage collection mechanisms such as 
+the Boehm garbage collector.
+
+7. Encryption
+
+Bstrlib does not present any built-in encryption mechanism.  However, it 
+supports full binary contents in its data buffers, so any standard block 
+based encryption mechanism can make direct use of bstrings/CBStrings for 
+buffer management.
+
+8. Double freeing
+
+Freeing a pointer that is already free is an extremely rare, but nevertheless 
+a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
+reboot, by calling free mulitiple times on already freed data using the WATCOM
+CRT.)  Bstrlib invalidates the bstring header data before freeing, so that in
+many cases a double free will be detected and an error will be reported 
+(though this behaviour is not guaranteed and should not be relied on).
+
+Using bstrFree pervasively (instead of bdestroy) can lead to somewhat 
+improved invalid free avoidance (it is completely safe whenever bstring
+instances are only stored in unique variables).  For example:
+
+    struct tagbstring hw = bsStatic ("Hello, world");
+    bstring cpHw = bstrcpy (&hw);
+
+    #ifdef NOT_QUITE_AS_SAFE
+        bdestroy (cpHw); /* Never fail */
+        bdestroy (cpHw); /* Error sometimes detected at runtime */
+        bdestroy (&hw);  /* Error detected at run time */
+    #else
+        bstrFree (cpHw); /* Never fail */
+        bstrFree (cpHw); /* Will do nothing */
+        bstrFree (&hw);  /* Will lead to a compile time error */
+    #endif
+
+9. Resource based denial of service
+
+bSecureInput() has been supplied in bstraux.c.  It has an optional upper limit
+for input length.  But unlike fgets(), it is also easily determined if the 
+buffer has been truncated early.  In this way, a program can set an upper limit
+on input sizes while still allowing for implementing context specific 
+truncation semantics (i.e., does the program consume but dump the extra 
+input, or does it consume it in later inputs?)
+
+10. Mixing char *'s and bstrings
+
+The bstring and char * representations are not identical.  So there is a risk
+when converting back and forth that data may lost.  Essentially bstrings can
+contain '\0' as a valid non-terminating character, while char * strings 
+cannot and in fact must use the character as a terminator.  The risk of data 
+loss is very low, since:
+
+  A) the simple method of only using bstrings in a char * semantically 
+     compatible way is both easy to achieve and pervasively supported.
+  B) obtaining '\0' content in a string is either deliberate or indicative
+     of another, likely more serious problem in the code.
+  C) the library comes with various functions which deal with this issue
+     (namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
+
+Marginal security issues:
+.........................
+
+11. 8-bit versus 9-bit portability
+
+Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent 
+possible to avoid portability problems.  However, Bstrlib has not been tested 
+on any system that does not represent char as 8-bits.  So whether or not it 
+works on 9-bit systems is an open question.  It is recommended that Bstrlib be 
+carefully auditted by anyone using a system in which CHAR_BIT is not 8.
+
+12. EBCDIC/ASCII/UTF-8 data representation attacks.
+
+Bstrlib uses ctype.h functions to ensure that it remains portable to non-
+ASCII systems.  It also checks range to make sure it is well defined even for
+data that ANSI does not define for the ctype functions.
+
+Obscure issues:
+...............
+
+13. Data attributes
+
+There is no support for a Perl-like "taint" attribute, however, an example of 
+how to do this using C++'s type system is given as an example.
+
--- a/src/decode.cpp
+++ b/src/decode.cpp
--- a/src/internal_includes/ControlFlowGraph.h
+++ b/src/internal_includes/ControlFlowGraph.h
@ -0,0 +1,163 @@
+#pragma once
+
+#include <set>
+#include <map>
+#include <utility>
+#include <vector>
+#include <memory>
+
+#ifdef __APPLE__
+#include <tr1/memory>
+#endif
+
+#include <stdint.h>
+
+struct Instruction;
+class Operand;
+
+namespace HLSLcc
+{
+#ifdef __APPLE__
+    // Herp derp Apple is stuck in 2005
+    using namespace std::tr1;
+#else
+    using namespace std;
+#endif
+    
+	namespace ControlFlow
+	{
+		class BasicBlock;
+
+		class ControlFlowGraph
+		{
+			friend class BasicBlock;
+		public:
+			ControlFlowGraph()
+				: m_BlockMap()
+				, m_BlockStorage()
+			{}
+
+			typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
+
+			const BasicBlock &Build(const Instruction *firstInstruction);
+
+			// Only works for instructions that start the basic block
+			const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
+
+			// non-const version for BasicBlock
+			BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction);
+
+			const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; }
+		private:
+
+			// Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block
+			typedef std::map<const Instruction *, BasicBlock *> BasicBlockMap;
+
+			BasicBlockMap m_BlockMap;
+
+			// auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these
+			BasicBlockStorage m_BlockStorage;
+		};
+
+
+		class BasicBlock
+		{
+			friend class ControlFlowGraph;
+		public:
+			// A set of register indices, one per each vec4 component per register
+			typedef std::set<uint32_t> RegisterSet;
+			// The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block
+			typedef std::set<const Instruction *> ConnectionSet;
+
+			struct Definition
+			{
+				Definition(const Instruction *i = NULL, const Operand *o = NULL)
+					: m_Instruction(i)
+					, m_Operand(o)
+				{}
+
+				Definition(const Definition &a)
+					: m_Instruction(a.m_Instruction)
+					, m_Operand(a.m_Operand)
+				{}
+
+				bool operator==(const Definition &a) const
+				{
+					if (a.m_Instruction != m_Instruction)
+						return false;
+					return a.m_Operand == m_Operand;
+				}
+
+				bool operator!=(const Definition &a) const
+				{
+					if (a.m_Instruction == m_Instruction)
+						return false;
+					return a.m_Operand != m_Operand;
+				}
+
+				bool operator<(const Definition &a) const
+				{
+					if (m_Instruction != a.m_Instruction)
+						return m_Instruction < a.m_Instruction;
+					return m_Operand < a.m_Operand;
+				}
+
+				const Instruction	*m_Instruction;
+				const Operand		*m_Operand;
+			};
+
+			typedef std::set<Definition> ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable
+			typedef std::map<uint32_t, ReachableDefinitionsPerVariable> ReachableVariables; // A VisibleDefinitionSet for each variable*component.
+
+			const Instruction *First() const { return m_First; }
+			const Instruction *Last() const { return m_Last; }
+
+			const RegisterSet &UEVar() const { return m_UEVar; }
+			const RegisterSet &VarKill() const { return m_VarKill; }
+
+			const ConnectionSet &Preceding() const { return m_Preceding; }
+			const ConnectionSet &Succeeding() const { return m_Succeeding; }
+
+			const ReachableVariables &DEDef() const { return m_DEDef; }
+			const ReachableVariables &Reachable() const { return m_Reachable; }
+
+			// Helper function: Do union of 2 ReachableVariables, store result in a.
+			static void RVarUnion(ReachableVariables &a, const ReachableVariables &b);
+
+		private:
+
+			// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
+			BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead);
+
+			// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
+			void Build();
+
+			bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed.
+
+
+			BasicBlock * AddChildBasicBlock(const Instruction *psFirst);
+
+		private:
+			ControlFlowGraph &m_Graph; // The graph object containing this block
+
+			const Instruction *m_First; // The first instruction in the basic block
+			const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
+
+			RegisterSet m_UEVar;	// Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
+			RegisterSet m_VarKill;	// Set of variables that are defined in this block.
+
+			ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG
+			ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG
+
+			ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set.
+
+			ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block.
+
+		};
+
+
+
+	};
+};
+
+
--- a/src/internal_includes/ControlFlowGraphUtils.h
+++ b/src/internal_includes/ControlFlowGraphUtils.h
@ -0,0 +1,31 @@
+#pragma once
+
+struct Instruction;
+
+namespace HLSLcc
+{
+	namespace ControlFlow
+	{
+		class Utils
+		{
+		public:
+			// For a given flow-control instruction, find the corresponding jump location:
+			// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
+			// For ELSE, find same level ENDIF + 1
+			// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
+			// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
+			// For ENDLOOP, find previous same-level LOOP + 1
+			// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
+			// For CONTINUE/C the previous LOOP + 1
+			// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
+			// Note that CASE labels fall through.
+			// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
+			// If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH
+			// If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it.
+			static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0);
+
+			static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0);
+
+		};
+	}
+}
--- a/src/internal_includes/DataTypeAnalysis.h
+++ b/src/internal_includes/DataTypeAnalysis.h
@ -0,0 +1,15 @@
+#pragma once
+
+#include "include/ShaderInfo.h"
+#include <vector>
+
+class HLSLCrossCompilerContext;
+struct Instruction;
+
+namespace HLSLcc
+{
+	namespace DataTypeAnalysis
+	{
+		void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> &instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results);
+	};
+};
--- a/src/internal_includes/Declaration.h
+++ b/src/internal_includes/Declaration.h
@ -0,0 +1,101 @@
+
+#pragma once
+
+#include <vector>
+#include <set>
+#include "internal_includes/tokens.h"
+#include "internal_includes/Operand.h"
+
+typedef struct ICBVec4_TAG {
+	uint32_t a;
+	uint32_t b;
+	uint32_t c;
+	uint32_t d;
+} ICBVec4;
+
+#define ACCESS_FLAG_READ       0x1
+#define ACCESS_FLAG_WRITE      0x2
+
+struct Declaration
+{
+	Declaration()
+	:
+	eOpcode(OPCODE_INVALID),
+	ui32NumOperands(0),
+	ui32BufferStride(0)
+	{}
+
+	OPCODE_TYPE eOpcode;
+
+	uint32_t ui32NumOperands;
+
+	Operand asOperands[2];
+
+	std::vector<ICBVec4> asImmediateConstBuffer;
+	//The declaration can set one of these
+	//values depending on the opcode.
+	union {
+		uint32_t ui32GlobalFlags;
+		uint32_t ui32NumTemps;
+		RESOURCE_DIMENSION eResourceDimension;
+		INTERPOLATION_MODE eInterpolation;
+		PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology;
+		PRIMITIVE eInputPrimitive;
+		uint32_t ui32MaxOutputVertexCount;
+		TESSELLATOR_DOMAIN eTessDomain;
+		TESSELLATOR_PARTITIONING eTessPartitioning;
+		TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
+		uint32_t aui32WorkGroupSize[3];
+		uint32_t ui32HullPhaseInstanceCount;
+		float fMaxTessFactor;
+		uint32_t ui32IndexRange;
+		uint32_t ui32GSInstanceCount;
+
+		struct Interface_TAG
+		{
+			uint32_t ui32InterfaceID;
+			uint32_t ui32NumFuncTables;
+			uint32_t ui32ArraySize;
+		} interface;
+	} value;
+
+	uint32_t ui32BufferStride;
+
+	struct UAV_TAG
+	{
+		UAV_TAG() :
+			ui32GloballyCoherentAccess(0),
+			bCounter(0),
+			Type(RETURN_TYPE_UNORM),
+			ui32NumComponents(0),
+			ui32AccessFlags(0)
+		{
+		}
+		uint32_t ui32GloballyCoherentAccess;
+		uint8_t bCounter;
+		RESOURCE_RETURN_TYPE Type;
+		uint32_t ui32NumComponents;
+		uint32_t ui32AccessFlags;
+	} sUAV;
+
+	struct TGSM_TAG
+	{
+		uint32_t ui32Stride;
+		uint32_t ui32Count;
+	} sTGSM;
+
+	struct IndexableTemp_TAG
+	{
+		uint32_t ui32RegIndex;
+		uint32_t ui32RegCount;
+		uint32_t ui32RegComponentSize;
+	} sIdxTemp;
+
+	uint32_t ui32TableLength;
+
+	uint32_t ui32IsShadowTex;
+
+	// Set indexed by sampler register number.
+	std::set<uint32_t> samplersUsed;
+};
+
--- a/src/internal_includes/HLSLCrossCompilerContext.h
+++ b/src/internal_includes/HLSLCrossCompilerContext.h
@ -0,0 +1,50 @@
+#pragma once
+
+#include <stdint.h>
+#include <string>
+#include "bstrlib.h"
+
+class Shader;
+class GLSLCrossDependencyData;
+class ShaderPhase;
+class Translator;
+class Operand;
+class HLSLccReflection;
+
+class HLSLCrossCompilerContext
+{
+public:
+	HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {}
+
+	bstring glsl;
+	bstring extensions;
+
+	bstring* currentGLSLString;//either glsl or earlyMain of current phase
+
+	uint32_t currentPhase;
+
+	int indent;
+	unsigned int flags;
+	Shader* psShader;
+	GLSLCrossDependencyData* psDependencies;
+	const char *inputPrefix; // Prefix for shader inputs
+	const char *outputPrefix; // Prefix for shader outputs
+
+	void DoDataTypeAnalysis(ShaderPhase *psPhase);
+
+	void ClearDependencyData();
+
+	void AddIndentation();
+
+	// Currently active translator
+	Translator *psTranslator;
+
+	HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info
+
+	// Retrieve the name for which the input or output is declared as. Takes into account possible redirections.
+	std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const;
+	std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const;
+
+	bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
+
+};
--- a/src/internal_includes/HLSLccToolkit.h
+++ b/src/internal_includes/HLSLccToolkit.h
@ -0,0 +1,127 @@
+#pragma once
+#include "hlslcc.h"
+#include "bstrlib.h"
+#include <vector>
+#include <string>
+
+#include "internal_includes/Instruction.h"
+#include "internal_includes/Operand.h"
+
+class HLSLCrossCompilerContext;
+
+namespace HLSLcc
+{
+	uint32_t GetNumberBitsSet(uint32_t a);
+
+	uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType);
+
+	SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags);
+
+	const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
+
+	const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
+		const int components, bool useGLSLPrecision);
+
+	const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
+		const int components);
+
+	std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
+
+	void AddSwizzleUsingElementCount(bstring dest, uint32_t count);
+
+	int WriteMaskToComponentCount(uint32_t writeMask);
+
+	uint32_t BuildComponentMaskFromElementCount(int count);
+
+	// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
+	bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src);
+
+	// Convert resource return type to SVT_ flags
+	uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType);
+
+	SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
+
+	uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
+
+	bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);
+
+	bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB);
+
+	int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim);
+
+	SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b);
+
+	// Returns true if the instruction adds 1 to the destination temp register
+	bool IsAddOneInstruction(const Instruction *psInst);
+
+	bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
+
+	// Helper function to print floats with full precision
+	void PrintFloat(bstring b, float f);
+
+	// Flags for ForeachOperand
+	// Process suboperands
+#define FEO_FLAG_SUBOPERAND 1
+	// Process src operands
+#define FEO_FLAG_SRC_OPERAND 2
+	// Process destination operands
+#define FEO_FLAG_DEST_OPERAND 4
+	// Convenience: Process all operands, both src and dest, and all suboperands
+#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND)
+
+	// For_each for all operands within a range of instructions. Flags above.
+	template<typename ItrType, typename F> void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback)
+	{
+		ItrType inst = _begin;
+		while (inst != _end)
+		{
+			uint32_t i, k;
+
+			if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
+			{
+				for (i = 0; i < inst->ui32FirstSrc; i++)
+				{
+					if (flags & FEO_FLAG_SUBOPERAND)
+					{
+						for (k = 0; k < MAX_SUB_OPERANDS; k++)
+						{
+							if (inst->asOperands[i].m_SubOperands[k].get())
+							{
+								callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
+							}
+						}
+					}
+					if (flags & FEO_FLAG_DEST_OPERAND)
+					{
+						callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND);
+					}
+				}
+			}
+
+			if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
+			{
+				for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++)
+				{
+					if (flags & FEO_FLAG_SUBOPERAND)
+					{
+						for (k = 0; k < MAX_SUB_OPERANDS; k++)
+						{
+							if (inst->asOperands[i].m_SubOperands[k].get())
+							{
+								callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
+							}
+						}
+					}
+					if (flags & FEO_FLAG_SRC_OPERAND)
+					{
+						callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND);
+					}
+				}
+			}
+
+			inst++;
+		}
+	}
+
+
+};
--- a/src/internal_includes/Instruction.h
+++ b/src/internal_includes/Instruction.h
@ -0,0 +1,134 @@
+#pragma once
+
+#include "internal_includes/Operand.h"
+#include "internal_includes/tokens.h"
+#include "include/ShaderInfo.h"
+#include <memory>
+
+#define ATOMIC_ADDRESS_BASIC 0
+#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1
+#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2
+
+#define TEXSMP_FLAG_NONE 0x0
+#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand
+#define TEXSMP_FLAG_DEPTHCOMPARE 0x2
+#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0
+#define TEXSMP_FLAG_BIAS 0x8
+#define TEXSMP_FLAG_GRAD 0x10
+//Gather specific flags
+#define TEXSMP_FLAG_GATHER 0x20
+#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand
+
+struct Instruction
+{
+	Instruction()
+    : eOpcode(OPCODE_NOP)
+	, eBooleanTestType(INSTRUCTION_TEST_ZERO)
+    , ui32NumOperands(0)
+    , ui32FirstSrc(0)
+	, m_Uses()
+	, m_SkipTranslation(false)
+	, m_InductorRegister(0)
+	, bSaturate(0)
+	{
+		m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
+	}
+
+	// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
+	Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0)
+	{
+		id = _id;
+		eOpcode = opcode;
+		eBooleanTestType = INSTRUCTION_TEST_ZERO;
+        ui32FirstSrc = 0;
+		ui32NumOperands = 0;
+		m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
+		m_SkipTranslation = false;
+		m_InductorRegister = 0;
+
+		if (reg1Mask == 0)
+			return;
+
+		ui32NumOperands++;
+		asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP;
+		asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1;
+		asOperands[0].ui32CompMask = reg1Mask;
+		asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
+
+		if (reg2Mask == 0)
+			return;
+
+		ui32FirstSrc = 1;
+		ui32NumOperands++;
+
+		asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
+		asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2;
+		asOperands[1].ui32CompMask = reg2Mask;
+		asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
+
+		if (reg3Mask == 0)
+			return;
+		ui32NumOperands++;
+
+		asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
+		asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3;
+		asOperands[2].ui32CompMask = reg3Mask;
+		asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
+
+		if (reg4Mask == 0)
+			return;
+		ui32NumOperands++;
+
+		asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
+		asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4;
+		asOperands[3].ui32CompMask = reg4Mask;
+		asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
+	}
+
+
+	bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const;
+
+	// Flags for ChangeOperandTempRegister
+#define UD_CHANGE_SUBOPERANDS 1
+#define UD_CHANGE_MAIN_OPERAND 2
+#define UD_CHANGE_ALL 3
+
+	void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase);
+
+
+	OPCODE_TYPE eOpcode;
+	INSTRUCTION_TEST_BOOLEAN eBooleanTestType;
+	uint32_t ui32SyncFlags;
+	uint32_t ui32NumOperands;
+	uint32_t ui32FirstSrc;
+	Operand asOperands[6];
+	uint32_t bSaturate;
+	uint32_t ui32FuncIndexWithinInterface;
+	RESINFO_RETURN_TYPE eResInfoReturnType;
+
+	int bAddressOffset;
+	int8_t iUAddrOffset;
+	int8_t iVAddrOffset;
+	int8_t iWAddrOffset;
+	RESOURCE_RETURN_TYPE xType, yType, zType, wType;
+	RESOURCE_DIMENSION eResDim;
+	int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
+
+	struct Use
+	{
+		Use() : m_Inst(0), m_Op(0) {}
+		Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
+		Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
+
+		Instruction *m_Inst; // The instruction that references the result of this instruction
+		Operand		*m_Op;   // The operand within the instruction above. Note: can also be suboperand.
+	};
+
+	std::vector<Use> m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg.
+
+	Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc,  and increment. 
+	bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation)
+	uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it
+
+	uint64_t id;
+};
--- a/src/internal_includes/LoopTransform.h
+++ b/src/internal_includes/LoopTransform.h
@ -0,0 +1,9 @@
+
+#pragma once
+
+class ShaderPhase;
+
+namespace HLSLcc
+{
+	void DoLoopTransform(ShaderPhase &phase);
+};
--- a/src/internal_includes/Operand.h
+++ b/src/internal_includes/Operand.h
@ -0,0 +1,152 @@
+#pragma once
+
+#include "internal_includes/tokens.h"
+#include <vector>
+#include <memory>
+
+#ifdef __APPLE__
+#include <tr1/memory>
+#endif
+
+enum{ MAX_SUB_OPERANDS = 3 };
+class Operand;
+class HLSLCrossCompilerContext;
+struct Instruction;
+
+#if _MSC_VER
+// We want to disable the "array will be default-initialized" warning, as that's exactly what we want
+#pragma warning(disable: 4351)
+#endif
+
+class Operand
+{
+public:
+#ifdef __APPLE__
+	// Herp derp Apple is stuck in 2005
+	typedef std::tr1::shared_ptr<Operand> SubOperandPtr;
+#else
+	typedef std::shared_ptr<Operand> SubOperandPtr;
+#endif
+
+
+
+	Operand()
+		:
+		iExtended(),
+		eType(),
+		eModifier(),
+		eMinPrecision(),
+		iIndexDims(),
+		iWriteMask(),
+		iGSInput(),
+		iPSInOut(),
+		iWriteMaskEnabled(),
+		iArrayElements(),
+		iNumComponents(),
+		eSelMode(),
+		ui32CompMask(),
+		ui32Swizzle(),
+		aui32Swizzle(),
+		aui32ArraySizes(),
+		ui32RegisterNumber(),
+		afImmediates(),
+		adImmediates(),
+		eSpecialName(),
+		specialName(),
+		eIndexRep(),
+		m_SubOperands(),
+		aeDataType(),
+		m_Rebase(0),
+		m_Size(0),
+		m_Defines(),
+		m_ForLoopInductorName(0)
+#ifdef _DEBUG
+		, id(0)
+#endif
+	{}
+
+	// Retrieve the mask of all the components this operand accesses (either reads from or writes to).
+	// Note that destination writemask does affect the effective access mask.
+	uint32_t GetAccessMask() const;
+	
+	// Returns the index of the highest accessed component, based on component mask
+	int GetMaxComponent() const;
+
+	bool IsSwizzleReplicated() const;
+
+	// Get the number of elements returned by operand, taking additional component mask into account
+	//e.g.
+	//.z = 1
+	//.x = 1
+	//.yw = 2
+	uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const;
+
+	// When this operand is used as an input declaration, how many components does it have?
+	int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const;
+
+	// Retrieve the operand data type.
+	SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const;
+
+	// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
+	int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const;
+	// Same as above but with explicit shader type and phase
+	int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
+
+	// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
+	static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);
+
+	int iExtended;
+	OPERAND_TYPE eType;
+	OPERAND_MODIFIER eModifier;
+	OPERAND_MIN_PRECISION eMinPrecision;
+	int iIndexDims;
+	int iWriteMask;
+	int iGSInput;
+	int iPSInOut;
+	int iWriteMaskEnabled;
+	int iArrayElements;
+	int iNumComponents;
+
+	OPERAND_4_COMPONENT_SELECTION_MODE eSelMode;
+	uint32_t ui32CompMask;
+	uint32_t ui32Swizzle;
+	uint32_t aui32Swizzle[4];
+
+	uint32_t aui32ArraySizes[3];
+	uint32_t ui32RegisterNumber;
+	//If eType is OPERAND_TYPE_IMMEDIATE32
+	float afImmediates[4];
+	//If eType is OPERAND_TYPE_IMMEDIATE64
+	double adImmediates[4];
+
+	SPECIAL_NAME eSpecialName;
+	std::string specialName;
+
+	OPERAND_INDEX_REPRESENTATION eIndexRep[3];
+
+	SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS];
+
+	//One type for each component.
+	SHADER_VARIABLE_TYPE aeDataType[4];
+
+	uint32_t m_Rebase; // Rebase value, for constant array accesses.
+	uint32_t m_Size; // Component count, only for constant array access.
+
+	struct Define
+	{
+		Define() : m_Inst(0), m_Op(0) {}
+		Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
+		Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
+
+		Instruction *m_Inst; // Instruction that writes to the temp
+		Operand		*m_Op;	 // The (destination) operand within that instruction.
+	};
+
+	std::vector<Define> m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP)
+	uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber)
+
+#ifdef _DEBUG
+	uint64_t id;
+#endif
+};
+
--- a/src/internal_includes/Shader.h
+++ b/src/internal_includes/Shader.h
@ -0,0 +1,267 @@
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <map>
+
+#include "growing_array.h"
+#include "internal_includes/tokens.h"
+#include "internal_includes/reflect.h"
+#include "include/ShaderInfo.h"
+#include "internal_includes/Instruction.h"
+#include "internal_includes/Declaration.h"
+#include "internal_includes/ControlFlowGraph.h"
+#include "bstrlib.h"
+
+struct ConstantArrayChunk
+{
+	ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {}
+	ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
+		: m_Size(sz), m_AccessMask(mask)
+	{
+		m_UseSites.push_back(firstUse);
+	}
+
+	uint32_t m_Size;
+	uint32_t m_AccessMask;
+	uint32_t m_Rebase;
+	uint32_t m_ComponentCount;
+
+	std::vector<Operand *> m_UseSites;
+};
+typedef std::multimap<uint32_t, ConstantArrayChunk> ChunkMap;
+
+struct ConstantArrayInfo
+{
+	ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {}
+
+	Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array
+	ChunkMap m_Chunks; // map of <starting offset, chunk info>, same start offset might have multiple entries for different access masks
+};
+
+class ShaderPhase
+{
+public:
+	ShaderPhase()
+		:
+		ePhase(MAIN_PHASE),
+		ui32InstanceCount(0),
+		postShaderCode(),
+		hasPostShaderCode(0),
+		earlyMain(),
+		ui32OrigTemps(0),
+		ui32TotalTemps(0),
+		psTempDeclaration(NULL),
+		pui32SplitInfo(),
+		peTempTypes(),
+		acInputNeedsRedirect(),
+		acOutputNeedsRedirect(),
+		acPatchConstantsNeedsRedirect(),
+		m_CFG(),
+		m_CFGInitialized(false),
+		m_NextFreeTempRegister(1),
+		m_NextTexCoordTemp(0)
+	{}
+
+	void ResolveUAVProperties();
+
+	void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
+
+	void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller
+
+	void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first!
+
+	ConstantArrayInfo m_ConstantArrayInfo;
+
+	std::vector<Declaration> psDecl;
+	std::vector<Instruction> psInst;
+
+	SHADER_PHASE_TYPE ePhase;
+	uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1.
+	bstring postShaderCode;//End of main or before emit()
+	int hasPostShaderCode;
+
+	bstring earlyMain;//Code to be inserted at the start of phase
+
+	uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared
+	uint32_t ui32TotalTemps; // The number of temporaries this phase has now
+	Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode
+
+	// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
+	// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
+	std::vector<uint32_t> pui32SplitInfo;
+	std::vector<SHADER_VARIABLE_TYPE> peTempTypes;
+
+	// These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together.
+	std::vector<unsigned char> acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared.
+	std::vector<unsigned char> acOutputNeedsRedirect; // Same for outputs
+	std::vector<unsigned char> acPatchConstantsNeedsRedirect; // Same for patch constants
+
+	// Get the Control Flow Graph for this phase, build it if necessary.
+	HLSLcc::ControlFlow::ControlFlowGraph &GetCFG();
+
+	uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
+	uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
+
+private:
+	bool m_CFGInitialized;
+	HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
+};
+
+class Shader
+{
+public:
+
+	Shader()
+		:
+		ui32MajorVersion(0),
+		ui32MinorVersion(0),
+		eShaderType(INVALID_SHADER),
+		eTargetLanguage(LANG_DEFAULT),
+		extensions(0),
+		fp64(0),
+		ui32ShaderLength(0),
+		aui32FuncTableToFuncPointer(),
+		aui32FuncBodyToFuncTable(),
+		funcTable(),
+		funcPointer(),
+		ui32NextClassFuncName(),
+		pui32FirstToken(NULL),
+		asPhases(),
+		sInfo(),
+		abScalarInput(),
+		abScalarOutput(),
+		aIndexedInput(),
+		aIndexedOutput(),
+		aIndexedInputParents(),
+		aeResourceDims(),
+		acInputDeclared(),
+		acOutputDeclared(),
+		aiOpcodeUsed(NUM_OPCODES, 0),
+		ui32CurrentVertexOutputStream(0),
+		textureSamplers(),
+		aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0),
+		ui32CurrentStructuredBufferIndex(),
+		m_CubemapArrayExtensionDeclared(false),
+		m_TextureBufferExtensionDeclared(false),
+		m_ClipDistanceExtensionDeclared(false)
+	{
+	}
+
+	// Retrieve the number of components the temp register has.
+	uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const;
+
+	//Hull shaders have multiple phases.
+	//Each phase has its own temps.
+	//Convert from per-phase temps to global temps.
+	void ConsolidateHullTempVars();
+
+	// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list
+	void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase);
+
+	// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
+	// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
+	// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero.
+	void PrepareStructuredBufferBindingSlots();
+
+	// Detect temp registers per data type that are actually used.
+	void PruneTempRegisters();
+
+	// Check if inputs and outputs are accessed across semantic boundaries
+	// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
+	void AnalyzeIOOverlap();
+
+	// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
+	void ForcePositionToHighp();
+	
+	void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used.
+
+	void ExpandSWAPCs();
+
+	uint32_t ui32MajorVersion;
+	uint32_t ui32MinorVersion;
+	SHADER_TYPE eShaderType;
+
+	GLLang eTargetLanguage;
+	const struct GlExtensions *extensions;
+
+	int fp64;
+
+	//DWORDs in program code, including version and length tokens.
+	uint32_t ui32ShaderLength;
+
+
+	//Instruction* functions;//non-main subroutines
+	HLSLcc::growing_vector<uint32_t> aui32FuncTableToFuncPointer; // dynamic alloc?
+	HLSLcc::growing_vector<uint32_t> aui32FuncBodyToFuncTable;
+
+	struct FuncTableEntry{
+		HLSLcc::growing_vector<uint32_t> aui32FuncBodies;
+	};
+	HLSLcc::growing_vector<FuncTableEntry> funcTable;
+
+	struct FuncPointerEntry {
+		HLSLcc::growing_vector<uint32_t> aui32FuncTables;
+		uint32_t ui32NumBodiesPerTable;
+	};
+
+	HLSLcc::growing_vector<FuncPointerEntry> funcPointer;
+
+	HLSLcc::growing_vector<uint32_t> ui32NextClassFuncName;
+
+	const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream.
+
+	std::vector<ShaderPhase> asPhases;
+	
+	ShaderInfo sInfo;
+
+	// There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex.
+	// Which one is used depends on the context:
+	// per-vertex space is used in vertex/pixel/geom shaders always
+	// hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT)
+	// domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT
+
+	// Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch
+	// Note that these ints are component masks
+	HLSLcc::growing_vector<int> abScalarInput[2];
+	HLSLcc::growing_vector<int> abScalarOutput[2];
+
+	HLSLcc::growing_vector<int> aIndexedInput[2];
+	HLSLcc::growing_vector<bool> aIndexedOutput[2];
+
+	HLSLcc::growing_vector<int> aIndexedInputParents[2];
+
+	HLSLcc::growing_vector<RESOURCE_DIMENSION> aeResourceDims;
+
+	HLSLcc::growing_vector<char> acInputDeclared[2];
+	HLSLcc::growing_vector<char> acOutputDeclared[2];
+
+	std::vector<int> aiOpcodeUsed; // Initialized to NUM_OPCODES elements above.
+
+	uint32_t ui32CurrentVertexOutputStream;
+
+	TextureSamplerPairs textureSamplers;
+
+	std::vector<uint32_t> aui32StructuredBufferBindingPoints;
+	uint32_t ui32CurrentStructuredBufferIndex;
+
+	bool m_CubemapArrayExtensionDeclared;
+	bool m_TextureBufferExtensionDeclared;
+	bool m_ClipDistanceExtensionDeclared;
+
+	std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
+	std::vector<char> psInt16TempSizes; // min16ints
+	std::vector<char> psInt12TempSizes; // min12ints
+	std::vector<char> psUIntTempSizes; // Same for uints
+	std::vector<char> psUInt16TempSizes; // ... and for uint16's
+	std::vector<char> psFloatTempSizes; // ...and for floats
+	std::vector<char> psFloat16TempSizes; // ...and for min16floats
+	std::vector<char> psFloat10TempSizes; // ...and for min10floats
+	std::vector<char> psDoubleTempSizes; // ...and for doubles
+	std::vector<char> psBoolTempSizes; // ... and for bools
+
+private:
+	void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);
+
+};
--- a/src/internal_includes/Translator.h
+++ b/src/internal_includes/Translator.h
@ -0,0 +1,35 @@
+
+#pragma once
+#include "HLSLCrossCompilerContext.h"
+#include "Shader.h"
+
+struct Declaration;
+// Base class for translator backend implenentations.
+class Translator
+{
+protected:
+	HLSLCrossCompilerContext *psContext;
+public:
+	explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {}
+	virtual ~Translator() {}
+
+	virtual bool Translate() = 0;
+
+	virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
+
+	// Translate system value type to name, return true if succeeded and no further translation is necessary
+	virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0;
+
+	// In GLSL, the input and output names cannot clash.
+	// Also, the output name of previous stage must match the input name of the next stage.
+	// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
+	//
+	virtual void SetIOPrefixes() = 0;
+
+	void SetExtensions(const struct GlExtensions *ext)
+	{
+		psContext->psShader->extensions = ext;
+	}
+
+
+};
--- a/src/internal_includes/UseDefineChains.h
+++ b/src/internal_includes/UseDefineChains.h
@ -0,0 +1,141 @@
+#pragma once
+
+#include <set>
+#include <map>
+#include <list>
+#include <vector>
+#include <algorithm>
+
+#include <stdint.h>
+#include <string.h>
+
+struct DefineUseChainEntry;
+struct UseDefineChainEntry;
+
+typedef std::set<DefineUseChainEntry *> DefineSet;
+typedef std::set<UseDefineChainEntry *> UsageSet;
+
+struct Instruction;
+class Operand;
+class ShaderInfo;
+namespace HLSLcc
+{
+	namespace ControlFlow
+	{
+		class ControlFlowGraph;
+	};
+};
+
+
+// Def-Use chain per temp component
+struct DefineUseChainEntry
+{
+	DefineUseChainEntry()
+		: psInst(0)
+		, psOp(0)
+		, usages()
+		, writeMask(0)
+		, index(0)
+		, isStandalone(0)
+	{
+		memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *));
+	}
+
+	Instruction *psInst;			// The declaration (write to this temp component)
+	Operand *psOp;					// The operand within this instruction for the write target
+	UsageSet usages;				// List of usages that are dependent on this write
+	uint32_t writeMask;				// Access mask; which all components were written to in the same op 
+	uint32_t index;					// For which component was this definition created for?
+	uint32_t isStandalone;			// A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings
+	struct DefineUseChainEntry *psSiblings[4];  // In case of vectorized op, contains pointer to this define's corresponding entries for the other components.
+
+#if _DEBUG
+	bool operator==(const DefineUseChainEntry &a) const
+	{
+		if (psInst != a.psInst)
+			return false;
+		if (psOp != a.psOp)
+			return false;
+		if (writeMask != a.writeMask)
+			return false;
+		if (index != a.index)
+			return false;
+		if (isStandalone != a.isStandalone)
+			return false;
+
+		// Just check that each one has the same amount of usages
+		if (usages.size() != a.usages.size())
+			return false;
+
+		return true;
+	}
+
+#endif
+
+};
+
+typedef std::list<DefineUseChainEntry> DefineUseChain;
+
+struct UseDefineChainEntry
+{
+	UseDefineChainEntry()
+		: psInst(0)
+		, psOp(0)
+		, defines()
+		, accessMask(0)
+		, index(0)
+	{
+		memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *));
+	}
+
+	Instruction *psInst;			// The use (read from this temp component)
+	Operand *psOp;					// The operand within this instruction for the read
+	DefineSet defines;				// List of writes that are visible to this read
+	uint32_t accessMask;			// Which all components were read together with this one
+	uint32_t index;					// For which component was this usage created for?
+	struct UseDefineChainEntry *psSiblings[4];  // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components.
+
+#if _DEBUG
+	bool operator==(const UseDefineChainEntry &a) const
+	{
+		if (psInst != a.psInst)
+			return false;
+		if (psOp != a.psOp)
+			return false;
+		if (accessMask != a.accessMask)
+			return false;
+		if (index != a.index)
+			return false;
+
+		// Just check that each one has the same amount of usages
+		if (defines.size() != a.defines.size())
+			return false;
+
+		return true;
+	}
+
+#endif
+
+};
+
+typedef std::list<UseDefineChainEntry> UseDefineChain;
+
+typedef std::map<uint32_t, UseDefineChain> UseDefineChains;
+typedef std::map<uint32_t, DefineUseChain> DefineUseChains;
+typedef std::vector<DefineUseChainEntry *> ActiveDefinitions;
+
+// Do flow control analysis on the instructions and build the define-use and use-define chains
+void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg);
+
+// Do temp splitting based on use-define chains
+void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable);
+
+// Based on the sampler precisions, downgrade the definitions if possible.
+void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps);
+
+// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition.
+void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps);
+
+// Write the uses and defines back to Instruction and Operand member lists.
+void WriteBackUsesAndDefines(DefineUseChains &psDUChains);
+
--- a/src/internal_includes/debug.h
+++ b/src/internal_includes/debug.h
@ -0,0 +1,18 @@
+#ifndef DEBUG_H_
+#define DEBUG_H_
+
+#ifdef _DEBUG
+#include "assert.h"
+#define ASSERT(expr) CustomAssert(expr)
+static void CustomAssert(int expression)
+{
+    if(!expression)
+    {
+        assert(0);
+    }
+}
+#else
+#define ASSERT(expr)
+#endif
+
+#endif
--- a/src/internal_includes/decode.h
+++ b/src/internal_includes/decode.h
@ -0,0 +1,10 @@
+#ifndef DECODE_H
+#define DECODE_H
+
+#include "internal_includes/Shader.h"
+
+Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags);
+
+void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst);
+
+#endif
--- a/src/internal_includes/languages.h
+++ b/src/internal_includes/languages.h
@ -0,0 +1,249 @@
+#ifndef LANGUAGES_H
+#define LANGUAGES_H
+
+#include "hlslcc.h"
+
+static int InOutSupported(const GLLang eLang)
+{
+	if(eLang == LANG_ES_100 || eLang == LANG_120)
+	{
+		return 0;
+	}
+	return 1;
+}
+
+static int WriteToFragData(const GLLang eLang)
+{
+	if(eLang == LANG_ES_100 || eLang == LANG_120)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int ShaderBitEncodingSupported(const GLLang eLang)
+{
+	if( eLang != LANG_ES_300 &&
+		eLang != LANG_ES_310 &&
+		eLang < LANG_330)
+	{
+		return 0;
+	}
+	return 1;
+}
+
+static int HaveOverloadedTextureFuncs(const GLLang eLang)
+{
+	if(eLang == LANG_ES_100 || eLang == LANG_120)
+	{
+		return 0;
+	}
+	return 1;
+}
+
+//Only enable for ES.
+//Not present in 120, ignored in other desktop languages.
+static int HavePrecisionQualifers(const GLLang eLang)
+{
+	if(eLang >= LANG_ES_100 && eLang <= LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveCubemapArray(const GLLang eLang)
+{
+	if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
+		return 1;
+	return 0;
+}
+
+static bool IsESLanguage(const GLLang eLang)
+{
+	return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST);
+}
+
+static bool IsDesktopGLLanguage(const GLLang eLang)
+{
+	return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST);
+}
+
+//Only on vertex inputs and pixel outputs.
+static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions)
+{
+    if(eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location))
+    {
+        return 1;
+    }
+    return 0;
+}
+
+static int HaveInOutLocationQualifier(const GLLang eLang)
+{
+	if(eLang >= LANG_410 || eLang == LANG_ES_310)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+//layout(binding = X) uniform {uniformA; uniformB;}
+//layout(location = X) uniform uniform_name;
+static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExtensions *extensions, unsigned int flags)
+{
+	if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)
+		return 0;
+
+	if (eLang >= LANG_430 || eLang == LANG_ES_310 ||
+		(extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack))
+    {
+        return 1;
+    }
+    return 0;
+}
+
+static int DualSourceBlendSupported(const GLLang eLang)
+{
+    if(eLang >= LANG_330)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+static int SubroutinesSupported(const GLLang eLang)
+{
+    if(eLang >= LANG_400)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+//Before 430, flat/smooth/centroid/noperspective must match
+//between fragment and its previous stage.
+//HLSL bytecode only tells us the interpolation in pixel shader.
+static int PixelInterpDependency(const GLLang eLang)
+{
+    if(eLang < LANG_430)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+static int HaveUVec(const GLLang eLang)
+{
+    switch(eLang)
+    {
+	case LANG_ES_100:
+	case LANG_120:
+        return 0;
+	default:
+		break;
+    }
+    return 1;
+}
+
+static int HaveGather(const GLLang eLang)
+{
+	if(eLang >= LANG_400 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveGatherNonConstOffset(const GLLang eLang)
+{
+	if(eLang >= LANG_420 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+
+static int HaveQueryLod(const GLLang eLang)
+{
+	if(eLang >= LANG_400)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveQueryLevels(const GLLang eLang)
+{
+	if(eLang >= LANG_430)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveFragmentCoordConventions(const GLLang eLang)
+{
+	if(eLang >= LANG_150)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveGeometryShaderARB(const GLLang eLang)
+{
+	if(eLang >= LANG_150)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveAtomicCounter(const GLLang eLang)
+{
+	if(eLang >= LANG_420 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveAtomicMem(const GLLang eLang)
+{
+	if (eLang >= LANG_430 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveImageAtomics(const GLLang eLang)
+{
+	if (eLang >= LANG_420)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveCompute(const GLLang eLang)
+{
+	if(eLang >= LANG_430 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+static int HaveImageLoadStore(const GLLang eLang)
+{
+	if(eLang >= LANG_420 || eLang == LANG_ES_310)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+#endif
--- a/src/internal_includes/reflect.h
+++ b/src/internal_includes/reflect.h
@ -0,0 +1,27 @@
+#ifndef REFLECT_H
+#define REFLECT_H
+
+#include "hlslcc.h"
+
+struct ShaderPhase_TAG;
+
+typedef struct
+{
+    uint32_t* pui32Inputs;
+    uint32_t* pui32Outputs;
+    uint32_t* pui32Resources;
+    uint32_t* pui32Interfaces;
+    uint32_t* pui32Inputs11;
+    uint32_t* pui32Outputs11;
+	uint32_t* pui32OutputsWithStreams;
+	uint32_t* pui32PatchConstants;
+	uint32_t* pui32PatchConstants11;
+} ReflectionChunks;
+
+void LoadShaderInfo(const uint32_t ui32MajorVersion,
+    const uint32_t ui32MinorVersion,
+    const ReflectionChunks* psChunks,
+    ShaderInfo* psInfo, uint32_t decodeFlags);
+
+#endif
+
--- a/src/internal_includes/toGLSL.h
+++ b/src/internal_includes/toGLSL.h
@ -0,0 +1,107 @@
+#pragma once
+
+#include "hlslcc.h"
+#include "internal_includes/Translator.h"
+
+class HLSLCrossCompilerContext;
+
+class ToGLSL : public Translator
+{
+protected:
+	GLLang language;
+public:
+	explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {}
+	// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
+	GLLang SetLanguage(GLLang suggestedLanguage);
+
+	virtual bool Translate();
+	virtual void TranslateDeclaration(const Declaration* psDecl);
+	virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
+	virtual void SetIOPrefixes();
+
+private:
+
+	void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
+	void TranslateInstruction(Instruction* psInst, bool isEmbedded = false);
+
+	void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
+
+	void TranslateOperandIndex(const Operand* psOperand, int index);
+	void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add);
+
+	void AddOpAssignToDestWithMask(const Operand* psDest,
+		SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
+	void AddAssignToDest(const Operand* psDest,
+		SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
+	void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
+
+	void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName);
+	void HandleOutputRedirect(const Declaration *psDecl, const char *Precision);
+	void HandleInputRedirect(const Declaration *psDecl, const char *Precision);
+
+	void AddUserOutput(const Declaration* psDecl);
+	void DeclareStructConstants(const uint32_t ui32BindingPoint,
+		const ConstantBuffer* psCBuf, const Operand* psOperand,
+		bstring glsl);
+
+	typedef enum
+	{
+		CMP_EQ,
+		CMP_LT,
+		CMP_GE,
+		CMP_NE,
+	} ComparisonType;
+	
+	void AddComparison(Instruction* psInst, ComparisonType eType,
+		uint32_t typeFlag);
+
+	void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false);
+	void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
+	void CallBinaryOp(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false);
+	void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
+		int dest, int src0, int src1, int src2, uint32_t dataType);
+	void CallHelper3(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
+	void CallHelper2(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper2Int(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper2UInt(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper1(const char* name, Instruction* psInst,
+		int dest, int src0, int paramsShouldFollowWriteMask);
+	void CallHelper1Int(
+		const char* name,
+		Instruction* psInst,
+		const int dest,
+		const int src0,
+		int paramsShouldFollowWriteMask);
+	void TranslateTexelFetch(
+		Instruction* psInst,
+		const ResourceBinding* psBinding,
+		bstring glsl);
+	void TranslateTexelFetchOffset(
+		Instruction* psInst,
+		const ResourceBinding* psBinding,
+		bstring glsl);
+	void TranslateTexCoord(
+		const RESOURCE_DIMENSION eResDim,
+		Operand* psTexCoordOperand);
+	void GetResInfoData(Instruction* psInst, int index, int destElem);
+	void TranslateTextureSample(Instruction* psInst,
+		uint32_t ui32Flags);
+	void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, 
+		const Operand* psByteAddr, uint32_t offset, uint32_t mask);
+	void TranslateShaderStorageStore(Instruction* psInst);
+	void TranslateShaderStorageLoad(Instruction* psInst);
+	void TranslateAtomicMemOp(Instruction* psInst);
+	void TranslateConditional(
+		Instruction* psInst,
+		bstring glsl);
+
+};
+
+
+
+
--- a/src/internal_includes/toGLSLOperand.h
+++ b/src/internal_includes/toGLSLOperand.h
@ -0,0 +1,23 @@
+#ifndef TO_GLSL_OPERAND_H
+#define TO_GLSL_OPERAND_H
+
+#include <stdint.h>
+#include "bstrlib.h"
+#include "ShaderInfo.h"
+
+class HLSLCrossCompilerContext;
+
+//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag);
+// Translate operand but add additional component mask
+//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask);
+
+void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase);
+void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase);
+
+void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
+std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
+
+std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
+void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
+
+#endif
--- a/src/internal_includes/toMetal.h
+++ b/src/internal_includes/toMetal.h
@ -0,0 +1,193 @@
+
+#pragma once
+#include "internal_includes/Translator.h"
+#include <map>
+#include <vector>
+
+// We store struct definition contents inside a vector of strings
+struct StructDefinition
+{
+	StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
+
+	std::vector<std::string> m_Members; // A vector of strings with the struct members
+	std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
+	bool m_IsPrinted; // Has this struct been printed out yet?
+};
+
+typedef std::map<std::string, StructDefinition> StructDefinitions;
+
+// Map of extra function definitions we need to add before the shader body but after the declarations.
+typedef std::map<std::string, std::string> FunctionDefinitions;
+
+// A helper class for allocating binding slots
+// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
+class BindingSlotAllocator
+{
+	typedef std::map<uint32_t, uint32_t> SlotMap;
+	SlotMap m_Allocations;
+public:
+	BindingSlotAllocator() : m_Allocations(), m_NextFreeSlot(0) {}
+
+	enum BindType
+	{
+		ConstantBuffer = 0,
+		RWBuffer,
+		Texture,
+		UAV
+	};
+
+	// isUAV is only meaningful for texture slots
+
+	uint32_t GetBindingSlot(uint32_t regNo, BindType type)
+	{
+		// The key is regNumber with the bindtype stored to highest 16 bits
+		uint32_t key = regNo | (uint32_t(type) << 16);
+		SlotMap::iterator itr = m_Allocations.find(key);
+		if (itr == m_Allocations.end())
+		{
+			m_Allocations.insert(std::make_pair(key, m_NextFreeSlot));
+			return m_NextFreeSlot++;
+		}
+		return itr->second;
+	}
+
+private:
+	uint32_t m_NextFreeSlot;
+};
+
+
+class ToMetal : public Translator
+{
+protected:
+	GLLang language;
+public:
+	explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx), m_ShadowSamplerDeclared(false) {}
+
+	virtual bool Translate();
+	virtual void TranslateDeclaration(const Declaration *psDecl);
+	virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
+	std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
+
+	virtual void SetIOPrefixes();
+
+private:
+	void TranslateInstruction(Instruction* psInst);
+
+	void DeclareBuiltinInput(const Declaration *psDecl);
+	void DeclareBuiltinOutput(const Declaration *psDecl);
+
+	// Retrieve the name of the output struct for this shader
+	std::string GetOutputStructName() const;
+	std::string GetInputStructName() const;
+
+	void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName);
+	void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName);
+
+	void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint);
+	void DeclareStructType(const std::string &name, const std::vector<ShaderVar> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false);
+	void DeclareStructType(const std::string &name, const std::vector<ShaderVarType> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0);
+	void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
+	void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
+	void DeclareBufferVariable(const Declaration *psDecl, const bool isRaw, const bool isUAV);
+
+	void DeclareResource(const Declaration *psDecl);
+	void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim);
+
+	void DeclareOutput(const Declaration *decl);
+
+	void PrintStructDeclarations(StructDefinitions &defs);
+
+	std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber);
+
+	// ToMetalOperand.cpp
+	std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true);
+	std::string TranslateOperandIndex(const Operand* psOperand, int index);
+	std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
+
+	// ToMetalInstruction.cpp
+
+	void AddOpAssignToDestWithMask(const Operand* psDest,
+		SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
+	void AddAssignToDest(const Operand* psDest,
+		SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
+	void AddAssignPrologue(int numParenthesis);
+
+	typedef enum
+	{
+		CMP_EQ,
+		CMP_LT,
+		CMP_GE,
+		CMP_NE,
+	} ComparisonType;
+
+	void AddComparison(Instruction* psInst, ComparisonType eType,
+		uint32_t typeFlag);
+
+	void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
+	void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
+	void CallBinaryOp(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
+	void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
+		int dest, int src0, int src1, int src2, uint32_t dataType);
+	void CallHelper3(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
+	void CallHelper2(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper2Int(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper2UInt(const char* name, Instruction* psInst,
+		int dest, int src0, int src1, int paramsShouldFollowWriteMask);
+	void CallHelper1(const char* name, Instruction* psInst,
+		int dest, int src0, int paramsShouldFollowWriteMask);
+	void CallHelper1Int(
+		const char* name,
+		Instruction* psInst,
+		const int dest,
+		const int src0,
+		int paramsShouldFollowWriteMask);
+	void TranslateTexelFetch(
+		Instruction* psInst,
+		const ResourceBinding* psBinding,
+		bstring glsl);
+	void TranslateTexelFetchOffset(
+		Instruction* psInst,
+		const ResourceBinding* psBinding,
+		bstring glsl);
+	void TranslateTexCoord(
+		const RESOURCE_DIMENSION eResDim,
+		Operand* psTexCoordOperand);
+	void GetResInfoData(Instruction* psInst, int index, int destElem);
+	void TranslateTextureSample(Instruction* psInst,
+		uint32_t ui32Flags);
+	void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
+		const Operand* psByteAddr, uint32_t offset, uint32_t mask);
+	void TranslateShaderStorageStore(Instruction* psInst);
+	void TranslateShaderStorageLoad(Instruction* psInst);
+	void TranslateAtomicMemOp(Instruction* psInst);
+	void TranslateConditional(
+		Instruction* psInst,
+		bstring glsl);
+
+	// The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters 
+	StructDefinitions m_StructDefinitions;
+
+	// A <function name, body text> map of extra helper functions we'll need.
+	FunctionDefinitions m_FunctionDefinitions;
+
+	BindingSlotAllocator m_TextureSlots;
+	BindingSlotAllocator m_BufferSlots;
+
+	std::string m_ExtraGlobalDefinitions;
+
+	bool m_ShadowSamplerDeclared;
+
+	void EnsureShadowSamplerDeclared();
+
+	// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
+	void DeclareExtraFunction(const std::string &name, const std::string &body);
+
+	// Move all lowp -> mediump
+	void ClampPartialPrecisions();
+};
+
+
--- a/src/internal_includes/toMetalDeclaration.h
+++ b/src/internal_includes/toMetalDeclaration.h
@ -0,0 +1,3 @@
+#pragma once
+
+#include "internal_includes/Declaration.h"
--- a/src/internal_includes/tokens.h
+++ b/src/internal_includes/tokens.h
@ -0,0 +1,783 @@
+#ifndef TOKENS_H
+#define TOKENS_H
+
+#include "hlslcc.h"
+
+enum SHADER_PHASE_TYPE
+{
+	SHADER_PHASE_INVALID = -1,
+	MAIN_PHASE = 0,
+	HS_GLOBAL_DECL_PHASE = 1,
+	HS_CTRL_POINT_PHASE = 2,
+	HS_FORK_PHASE = 3,
+	HS_JOIN_PHASE = 4
+};
+
+static SHADER_TYPE DecodeShaderType(uint32_t ui32Token)
+{
+	return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16);
+}
+
+static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token)
+{
+    return (ui32Token & 0x000000f0) >> 4;
+}
+
+static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token)
+{
+    return (ui32Token & 0x0000000f);
+}
+
+static uint32_t DecodeInstructionLength(uint32_t ui32Token)
+{
+    return (ui32Token & 0x7f000000) >> 24;
+}
+
+static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token)
+{
+    return (ui32Token & 0x80000000) >> 31;
+}
+
+typedef enum EXTENDED_OPCODE_TYPE
+{
+    EXTENDED_OPCODE_EMPTY           = 0,
+    EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
+    EXTENDED_OPCODE_RESOURCE_DIM = 2,
+    EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
+} EXTENDED_OPCODE_TYPE;
+
+static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token)
+{
+    return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f);
+}
+
+
+static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
+{
+    return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4))&0xF);
+}
+
+static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
+{
+    return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4 + 6))&0xF);
+}
+
+enum OPCODE_TYPE
+{
+    //For DX9
+	OPCODE_POW = -6,
+	OPCODE_DP2ADD = -5,
+	OPCODE_LRP = -4,
+	OPCODE_ENDREP = -3,
+	OPCODE_REP = -2,
+    OPCODE_SPECIAL_DCL_IMMCONST = -1,
+
+    OPCODE_ADD,
+    OPCODE_AND,
+    OPCODE_BREAK,
+    OPCODE_BREAKC,
+    OPCODE_CALL,
+    OPCODE_CALLC,
+    OPCODE_CASE,
+    OPCODE_CONTINUE,
+    OPCODE_CONTINUEC,
+    OPCODE_CUT,
+    OPCODE_DEFAULT,
+    OPCODE_DERIV_RTX,
+    OPCODE_DERIV_RTY,
+    OPCODE_DISCARD,
+    OPCODE_DIV,
+    OPCODE_DP2,
+    OPCODE_DP3,
+    OPCODE_DP4,
+    OPCODE_ELSE,
+    OPCODE_EMIT,
+    OPCODE_EMITTHENCUT,
+    OPCODE_ENDIF,
+    OPCODE_ENDLOOP,
+    OPCODE_ENDSWITCH,
+    OPCODE_EQ,
+    OPCODE_EXP,
+    OPCODE_FRC,
+    OPCODE_FTOI,
+    OPCODE_FTOU,
+    OPCODE_GE,
+    OPCODE_IADD,
+    OPCODE_IF,
+    OPCODE_IEQ,
+    OPCODE_IGE,
+    OPCODE_ILT,
+    OPCODE_IMAD,
+    OPCODE_IMAX,
+    OPCODE_IMIN,
+    OPCODE_IMUL,
+    OPCODE_INE,
+    OPCODE_INEG,
+    OPCODE_ISHL,
+    OPCODE_ISHR,
+    OPCODE_ITOF,
+    OPCODE_LABEL,
+    OPCODE_LD,
+    OPCODE_LD_MS,
+    OPCODE_LOG,
+    OPCODE_LOOP,
+    OPCODE_LT,
+    OPCODE_MAD,
+    OPCODE_MIN,
+    OPCODE_MAX,
+    OPCODE_CUSTOMDATA,
+    OPCODE_MOV,
+    OPCODE_MOVC,
+    OPCODE_MUL,
+    OPCODE_NE,
+    OPCODE_NOP,
+    OPCODE_NOT,
+    OPCODE_OR,
+    OPCODE_RESINFO,
+    OPCODE_RET,
+    OPCODE_RETC,
+    OPCODE_ROUND_NE,
+    OPCODE_ROUND_NI,
+    OPCODE_ROUND_PI,
+    OPCODE_ROUND_Z,
+    OPCODE_RSQ,
+    OPCODE_SAMPLE,
+    OPCODE_SAMPLE_C,
+    OPCODE_SAMPLE_C_LZ,
+    OPCODE_SAMPLE_L,
+    OPCODE_SAMPLE_D,
+    OPCODE_SAMPLE_B,
+    OPCODE_SQRT,
+    OPCODE_SWITCH,
+    OPCODE_SINCOS,
+    OPCODE_UDIV,
+    OPCODE_ULT,
+    OPCODE_UGE,
+    OPCODE_UMUL,
+    OPCODE_UMAD,
+    OPCODE_UMAX,
+    OPCODE_UMIN,
+    OPCODE_USHR,
+    OPCODE_UTOF,
+    OPCODE_XOR,
+    OPCODE_DCL_RESOURCE, // DCL* opcodes have
+    OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats.
+    OPCODE_DCL_SAMPLER,
+    OPCODE_DCL_INDEX_RANGE,
+    OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY,
+    OPCODE_DCL_GS_INPUT_PRIMITIVE,
+    OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,
+    OPCODE_DCL_INPUT,
+    OPCODE_DCL_INPUT_SGV,
+    OPCODE_DCL_INPUT_SIV,
+    OPCODE_DCL_INPUT_PS,
+    OPCODE_DCL_INPUT_PS_SGV,
+    OPCODE_DCL_INPUT_PS_SIV,
+    OPCODE_DCL_OUTPUT,
+    OPCODE_DCL_OUTPUT_SGV,
+    OPCODE_DCL_OUTPUT_SIV,
+    OPCODE_DCL_TEMPS,
+    OPCODE_DCL_INDEXABLE_TEMP,
+    OPCODE_DCL_GLOBAL_FLAGS,
+
+// -----------------------------------------------
+
+    OPCODE_RESERVED_10,
+    
+// ---------- DX 10.1 op codes---------------------
+
+    OPCODE_LOD,
+    OPCODE_GATHER4,
+    OPCODE_SAMPLE_POS,
+    OPCODE_SAMPLE_INFO,
+
+// -----------------------------------------------
+
+    // This should be 10.1's version of NUM_OPCODES
+    OPCODE_RESERVED_10_1,
+
+// ---------- DX 11 op codes---------------------
+    OPCODE_HS_DECLS, // token marks beginning of HS sub-shader
+    OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader
+    OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader
+    OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader
+
+    OPCODE_EMIT_STREAM,
+    OPCODE_CUT_STREAM,
+    OPCODE_EMITTHENCUT_STREAM,
+    OPCODE_INTERFACE_CALL,
+
+    OPCODE_BUFINFO,
+    OPCODE_DERIV_RTX_COARSE,
+    OPCODE_DERIV_RTX_FINE,
+    OPCODE_DERIV_RTY_COARSE,
+    OPCODE_DERIV_RTY_FINE,
+    OPCODE_GATHER4_C,
+    OPCODE_GATHER4_PO,
+    OPCODE_GATHER4_PO_C,
+    OPCODE_RCP,
+    OPCODE_F32TOF16,
+    OPCODE_F16TOF32,
+    OPCODE_UADDC,
+    OPCODE_USUBB,
+    OPCODE_COUNTBITS,
+    OPCODE_FIRSTBIT_HI,
+    OPCODE_FIRSTBIT_LO,
+    OPCODE_FIRSTBIT_SHI,
+    OPCODE_UBFE,
+    OPCODE_IBFE,
+    OPCODE_BFI,
+    OPCODE_BFREV,
+    OPCODE_SWAPC,
+
+    OPCODE_DCL_STREAM,
+    OPCODE_DCL_FUNCTION_BODY,
+    OPCODE_DCL_FUNCTION_TABLE,
+    OPCODE_DCL_INTERFACE,
+    
+    OPCODE_DCL_INPUT_CONTROL_POINT_COUNT,
+    OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT,
+    OPCODE_DCL_TESS_DOMAIN,
+    OPCODE_DCL_TESS_PARTITIONING,
+    OPCODE_DCL_TESS_OUTPUT_PRIMITIVE,
+    OPCODE_DCL_HS_MAX_TESSFACTOR,
+    OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT,
+    OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT,
+
+    OPCODE_DCL_THREAD_GROUP,
+    OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED,
+    OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW,
+    OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
+    OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
+    OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
+    OPCODE_DCL_RESOURCE_RAW,
+    OPCODE_DCL_RESOURCE_STRUCTURED,
+    OPCODE_LD_UAV_TYPED,
+    OPCODE_STORE_UAV_TYPED,
+    OPCODE_LD_RAW,
+    OPCODE_STORE_RAW,
+    OPCODE_LD_STRUCTURED,
+    OPCODE_STORE_STRUCTURED,
+    OPCODE_ATOMIC_AND,
+    OPCODE_ATOMIC_OR,
+    OPCODE_ATOMIC_XOR,
+    OPCODE_ATOMIC_CMP_STORE,
+    OPCODE_ATOMIC_IADD,
+    OPCODE_ATOMIC_IMAX,
+    OPCODE_ATOMIC_IMIN,
+    OPCODE_ATOMIC_UMAX,
+    OPCODE_ATOMIC_UMIN,
+    OPCODE_IMM_ATOMIC_ALLOC,
+    OPCODE_IMM_ATOMIC_CONSUME,
+    OPCODE_IMM_ATOMIC_IADD,
+    OPCODE_IMM_ATOMIC_AND,
+    OPCODE_IMM_ATOMIC_OR,
+    OPCODE_IMM_ATOMIC_XOR,
+    OPCODE_IMM_ATOMIC_EXCH,
+    OPCODE_IMM_ATOMIC_CMP_EXCH,
+    OPCODE_IMM_ATOMIC_IMAX,
+    OPCODE_IMM_ATOMIC_IMIN,
+    OPCODE_IMM_ATOMIC_UMAX,
+    OPCODE_IMM_ATOMIC_UMIN,   
+    OPCODE_SYNC,
+    
+    OPCODE_DADD,
+    OPCODE_DMAX,
+    OPCODE_DMIN,
+    OPCODE_DMUL,
+    OPCODE_DEQ,
+    OPCODE_DGE,
+    OPCODE_DLT,
+    OPCODE_DNE,
+    OPCODE_DMOV,
+    OPCODE_DMOVC,
+    OPCODE_DTOF,
+    OPCODE_FTOD,
+
+    OPCODE_EVAL_SNAPPED,
+    OPCODE_EVAL_SAMPLE_INDEX,
+    OPCODE_EVAL_CENTROID,
+    
+    OPCODE_DCL_GS_INSTANCE_COUNT,
+
+    OPCODE_ABORT,
+    OPCODE_DEBUG_BREAK,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.0 opcodes
+    OPCODE_RESERVED_11,
+
+    OPCODE_DDIV,
+    OPCODE_DFMA,
+    OPCODE_DRCP,
+
+    OPCODE_MSAD,
+
+    OPCODE_DTOI,
+    OPCODE_DTOU,
+    OPCODE_ITOD,
+    OPCODE_UTOD,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.1 opcodes
+    OPCODE_RESERVED_11_1,
+
+    NUM_OPCODES,
+    OPCODE_INVALID = NUM_OPCODES,
+};
+
+static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token)
+{
+    return (OPCODE_TYPE)(ui32Token & 0x00007ff);
+}
+
+typedef enum
+{
+    INDEX_0D,
+    INDEX_1D,
+    INDEX_2D,
+    INDEX_3D,
+} OPERAND_INDEX_DIMENSION;
+
+static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token)
+{
+	return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20);
+}
+
+typedef enum OPERAND_TYPE
+{
+    OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10,
+	OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9,
+	OPERAND_TYPE_SPECIAL_TEXCOORD = -8,
+    OPERAND_TYPE_SPECIAL_POSITION = -7,
+    OPERAND_TYPE_SPECIAL_FOG = -6,
+    OPERAND_TYPE_SPECIAL_POINTSIZE = -5,
+    OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4,
+    OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3,
+    OPERAND_TYPE_SPECIAL_ADDRESS = -2,
+    OPERAND_TYPE_SPECIAL_IMMCONST = -1,
+    OPERAND_TYPE_TEMP           = 0,  // Temporary Register File
+    OPERAND_TYPE_INPUT          = 1,  // General Input Register File
+    OPERAND_TYPE_OUTPUT         = 2,  // General Output Register File
+    OPERAND_TYPE_INDEXABLE_TEMP = 3,  // Temporary Register File (indexable)
+    OPERAND_TYPE_IMMEDIATE32    = 4,  // 32bit/component immediate value(s)
+                                          // If for example, operand token bits
+                                          // [01:00]==OPERAND_4_COMPONENT,
+                                          // this means that the operand type:
+                                          // OPERAND_TYPE_IMMEDIATE32
+                                          // results in 4 additional 32bit
+                                          // DWORDS present for the operand.
+    OPERAND_TYPE_IMMEDIATE64    = 5,  // 64bit/comp.imm.val(s)HI:LO
+    OPERAND_TYPE_SAMPLER        = 6,  // Reference to sampler state
+    OPERAND_TYPE_RESOURCE       = 7,  // Reference to memory resource (e.g. texture)
+    OPERAND_TYPE_CONSTANT_BUFFER= 8,  // Reference to constant buffer
+    OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9,  // Reference to immediate constant buffer
+    OPERAND_TYPE_LABEL          = 10, // Label
+    OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
+    OPERAND_TYPE_OUTPUT_DEPTH   = 12, // Output Depth
+    OPERAND_TYPE_NULL           = 13, // Null register, used to discard results of operations
+                                               // Below Are operands new in DX 10.1
+    OPERAND_TYPE_RASTERIZER     = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
+    OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
+                                               // Below Are operands new in DX 11
+    OPERAND_TYPE_STREAM         = 16, // Reference to GS stream output resource
+    OPERAND_TYPE_FUNCTION_BODY  = 17, // Reference to a function definition
+    OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
+    OPERAND_TYPE_INTERFACE      = 19, // Reference to an interface
+    OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
+    OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
+    OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
+    OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
+    OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
+    OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
+    OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
+    OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
+    OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
+    OPERAND_TYPE_THIS_POINTER       = 29, // Reference to an interface this pointer
+    OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
+    OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
+    OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
+    OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
+    OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
+    OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
+    OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
+    OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
+    OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
+    OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL    = 39, // Output Depth, forced to be less than or equal to current depth
+    OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
+} OPERAND_TYPE;
+
+static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token)
+{
+	return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12);
+}
+
+static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token)
+{
+	return (SPECIAL_NAME)(ui32Token & 0x0000ffff);
+}
+
+typedef enum OPERAND_INDEX_REPRESENTATION
+{
+    OPERAND_INDEX_IMMEDIATE32               = 0, // Extra DWORD
+    OPERAND_INDEX_IMMEDIATE64               = 1, // 2 Extra DWORDs
+                                                     //   (HI32:LO32)
+    OPERAND_INDEX_RELATIVE                  = 2, // Extra operand
+    OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
+                                                     //   extra operand
+    OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
+                                                     //   (HI32:LO32) followed
+                                                     //   by extra operand
+} OPERAND_INDEX_REPRESENTATION;
+
+static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token)
+{
+	return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3<<(22+3*((ui32Dimension)&3)))) >> (22+3*((ui32Dimension)&3)));
+}
+
+typedef enum OPERAND_NUM_COMPONENTS
+{
+    OPERAND_0_COMPONENT = 0,
+    OPERAND_1_COMPONENT = 1,
+    OPERAND_4_COMPONENT = 2,
+    OPERAND_N_COMPONENT = 3 // unused for now
+} OPERAND_NUM_COMPONENTS;
+
+static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token)
+{
+	return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003);
+}
+
+typedef enum OPERAND_4_COMPONENT_SELECTION_MODE
+{
+    OPERAND_4_COMPONENT_MASK_MODE    = 0,  // mask 4 components
+    OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,  // swizzle 4 components
+    OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
+} OPERAND_4_COMPONENT_SELECTION_MODE;
+
+static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token)
+{
+	return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2);
+}
+
+#define OPERAND_4_COMPONENT_MASK_X      0x00000001
+#define OPERAND_4_COMPONENT_MASK_Y      0x00000002
+#define OPERAND_4_COMPONENT_MASK_Z      0x00000004
+#define OPERAND_4_COMPONENT_MASK_W      0x00000008
+#define OPERAND_4_COMPONENT_MASK_R      OPERAND_4_COMPONENT_MASK_X
+#define OPERAND_4_COMPONENT_MASK_G      OPERAND_4_COMPONENT_MASK_Y
+#define OPERAND_4_COMPONENT_MASK_B      OPERAND_4_COMPONENT_MASK_Z
+#define OPERAND_4_COMPONENT_MASK_A      OPERAND_4_COMPONENT_MASK_W
+#define OPERAND_4_COMPONENT_MASK_ALL    0x0000000f
+
+static uint32_t DecodeOperand4CompMask(uint32_t ui32Token)
+{
+	return (uint32_t)((ui32Token & 0x000000f0) >> 4);
+}
+
+static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token)
+{
+	return (uint32_t)((ui32Token & 0x00000ff0) >> 4);
+}
+
+static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token)
+{
+	return (uint32_t)((ui32Token & 0x00000030) >> 4);
+}
+
+#define OPERAND_4_COMPONENT_X      0
+#define OPERAND_4_COMPONENT_Y      1
+#define OPERAND_4_COMPONENT_Z      2
+#define OPERAND_4_COMPONENT_W      3
+
+static const uint32_t NO_SWIZZLE = (( (OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y<<2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6))/*<<4*/);
+
+static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6)));
+static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6)));
+static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6)));
+static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6)));
+
+static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp)
+{
+    return (uint32_t)(((ui32Token)>>(4+2*((comp)&3)))&3);
+}
+
+typedef enum RESOURCE_DIMENSION
+{
+    RESOURCE_DIMENSION_UNKNOWN = 0,
+    RESOURCE_DIMENSION_BUFFER = 1,
+    RESOURCE_DIMENSION_TEXTURE1D = 2,
+    RESOURCE_DIMENSION_TEXTURE2D = 3,
+    RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+    RESOURCE_DIMENSION_TEXTURE3D = 5,
+    RESOURCE_DIMENSION_TEXTURECUBE = 6,
+    RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+    RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+    RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+    RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
+    RESOURCE_DIMENSION_RAW_BUFFER = 11,
+    RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
+} RESOURCE_DIMENSION;
+
+static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token)
+{
+	return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11);
+}
+
+static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token)
+{
+	return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6);
+}
+
+typedef enum INSTRUCTION_TEST_BOOLEAN
+{
+    INSTRUCTION_TEST_ZERO       = 0,
+    INSTRUCTION_TEST_NONZERO    = 1
+} INSTRUCTION_TEST_BOOLEAN;
+
+static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token)
+{
+	return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18);
+}
+
+static uint32_t DecodeIsOperandExtended(uint32_t ui32Token)
+{
+    return (ui32Token & 0x80000000) >> 31;
+}
+
+typedef enum EXTENDED_OPERAND_TYPE
+{
+    EXTENDED_OPERAND_EMPTY            = 0,
+    EXTENDED_OPERAND_MODIFIER         = 1,
+} EXTENDED_OPERAND_TYPE;
+
+static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token)
+{
+	return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f);
+}
+
+typedef enum OPERAND_MODIFIER
+{
+    OPERAND_MODIFIER_NONE     = 0,
+    OPERAND_MODIFIER_NEG      = 1,
+    OPERAND_MODIFIER_ABS      = 2,
+    OPERAND_MODIFIER_ABSNEG   = 3,
+} OPERAND_MODIFIER;
+
+static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token)
+{
+	return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6);
+}
+
+static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1<<11);
+static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1<<12);
+static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1<<13);
+static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1<<14);
+static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1<<15);
+static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1<<16);
+static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1<<17);
+static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1<<18);
+
+static uint32_t DecodeGlobalFlags(uint32_t ui32Token)
+{
+	return (uint32_t)(ui32Token & 0x00fff800);
+}
+
+static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token)
+{
+	return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11);
+}
+
+
+typedef enum PRIMITIVE_TOPOLOGY
+{
+    PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+    PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+    PRIMITIVE_TOPOLOGY_LINELIST = 2,
+    PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+    PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+    PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+    // 6 is reserved for legacy triangle fans
+    // Adjacency values should be equal to (0x8 & non-adjacency):
+    PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+    PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+    PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+    PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
+} PRIMITIVE_TOPOLOGY;
+
+static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token)
+{
+	return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11);
+}
+
+typedef enum PRIMITIVE
+{
+    PRIMITIVE_UNDEFINED = 0,
+    PRIMITIVE_POINT = 1,
+    PRIMITIVE_LINE = 2,
+    PRIMITIVE_TRIANGLE = 3,
+    // Adjacency values should be equal to (0x4 & non-adjacency):
+    PRIMITIVE_LINE_ADJ = 6,
+    PRIMITIVE_TRIANGLE_ADJ = 7,
+    PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
+    PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
+    PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
+    PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
+    PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
+    PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
+    PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
+    PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
+    PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
+    PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
+    PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
+    PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
+    PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
+    PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
+    PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
+    PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
+    PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
+    PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
+    PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
+    PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
+    PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
+    PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
+    PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
+    PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
+    PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
+    PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
+    PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
+    PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
+    PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
+    PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
+    PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
+    PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
+} PRIMITIVE;
+
+static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token)
+{
+	return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11);
+}
+
+static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token)
+{
+	return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11);
+}
+
+typedef enum TESSELLATOR_DOMAIN
+{
+    TESSELLATOR_DOMAIN_UNDEFINED = 0,
+    TESSELLATOR_DOMAIN_ISOLINE   = 1,
+    TESSELLATOR_DOMAIN_TRI       = 2,
+    TESSELLATOR_DOMAIN_QUAD      = 3
+} TESSELLATOR_DOMAIN;
+
+static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token)
+{
+	return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11);
+}
+
+static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token)
+{
+	return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11);
+}
+
+static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800;
+static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000;
+static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000;
+static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000;
+
+static uint32_t DecodeSyncFlags(uint32_t ui32Token)
+{
+	return ui32Token & 0x00007800;
+}
+
+// The number of types that implement this interface
+static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token)
+{
+	return (uint32_t)((ui32Token & 0x0000ffff) >> 0);
+}
+
+// The number of interfaces that are defined in this array.
+static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token)
+{
+	return (uint32_t)((ui32Token & 0xffff0000) >> 16);
+}
+
+typedef enum CUSTOMDATA_CLASS
+{
+    CUSTOMDATA_COMMENT = 0,
+    CUSTOMDATA_DEBUGINFO,
+    CUSTOMDATA_OPAQUE,
+    CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
+    CUSTOMDATA_SHADER_MESSAGE,
+} CUSTOMDATA_CLASS;
+
+static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token)
+{
+	return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11);
+}
+
+static uint32_t DecodeInstructionSaturate(uint32_t ui32Token)
+{
+    return (ui32Token & 0x00002000) ? 1 : 0;
+}
+
+typedef enum OPERAND_MIN_PRECISION
+{
+    OPERAND_MIN_PRECISION_DEFAULT    = 0, // Default precision 
+                                            // for the shader model
+    OPERAND_MIN_PRECISION_FLOAT_16   = 1, // Min 16 bit/component float
+    OPERAND_MIN_PRECISION_FLOAT_2_8  = 2, // Min 10(2.8)bit/comp. float
+    OPERAND_MIN_PRECISION_SINT_16    = 4, // Min 16 bit/comp. signed integer
+    OPERAND_MIN_PRECISION_UINT_16    = 5, // Min 16 bit/comp. unsigned integer
+} OPERAND_MIN_PRECISION;
+
+static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token)
+{
+    return (ui32Token & 0x0001C000) >> 14;
+}
+
+static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token)
+{
+	return ((ui32Token & 0x0001f800) >> 11);
+}
+
+typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD
+{
+    IMMEDIATE_ADDRESS_OFFSET_U        = 0,
+    IMMEDIATE_ADDRESS_OFFSET_V        = 1,
+    IMMEDIATE_ADDRESS_OFFSET_W        = 2,
+} IMMEDIATE_ADDRESS_OFFSET_COORD;
+
+
+#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3))
+#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
+
+static uint32_t DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_COORD eCoord, uint32_t ui32Token)
+{
+    return ((((ui32Token)&IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord))>>(IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord))));
+}
+
+// UAV access scope flags
+static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000;
+static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token)
+{
+    return ui32Token & 0x00010000;
+}
+
+
+typedef enum RESINFO_RETURN_TYPE
+{
+    RESINFO_INSTRUCTION_RETURN_FLOAT      = 0,
+    RESINFO_INSTRUCTION_RETURN_RCPFLOAT   = 1,
+    RESINFO_INSTRUCTION_RETURN_UINT       = 2
+} RESINFO_RETURN_TYPE;
+
+static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token)
+{
+    return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11);
+}
+
+#endif
--- a/src/reflect.cpp
+++ b/src/reflect.cpp
@ -0,0 +1,600 @@
+
+#include "internal_includes/reflect.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/decode.h"
+#include "bstrlib.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+static void FormatVariableName(std::string & Name)
+{
+    /* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx
+       The uniform function parameters appear in the
+       constant table prepended with a dollar sign ($),
+       unlike the global variables. The dollar sign is
+       required to avoid name collisions between local
+       uniform inputs and global variables of the same name.*/
+
+    /* Leave $ThisPointer, $Element and $Globals as-is.
+       Otherwise remove $ character ($ is not a valid character for GLSL variable names). */
+    if(Name[0] == '$')
+    {
+        if(strcmp(Name.c_str(), "$Element") !=0 &&
+			strcmp(Name.c_str(), "$Globals") != 0 &&
+			strcmp(Name.c_str(), "$ThisPointer") != 0)
+        {
+            Name[0] = '_';
+        }
+    }
+}
+
+static std::string ReadStringFromTokenStream(const uint32_t* tokens)
+{
+    char* charTokens = (char*) tokens;
+	return std::string(charTokens);
+}
+
+static int MaskToRebaseOffset(const uint32_t mask)
+{
+	int res = 0;
+	uint32_t m = mask;
+	while ((m & 1) == 0)
+	{
+		res++;
+		m = m >> 1;
+	}
+	return res;
+}
+
+static void ReadInputSignatures(const uint32_t* pui32Tokens,
+                        ShaderInfo* psShaderInfo,
+						const int extended)
+{
+    uint32_t i;
+
+    const uint32_t* pui32FirstSignatureToken = pui32Tokens;
+    const uint32_t ui32ElementCount = *pui32Tokens++;
+    /* const uint32_t ui32Key = * */ pui32Tokens++;
+
+	psShaderInfo->psInputSignatures.clear();
+	psShaderInfo->psInputSignatures.resize(ui32ElementCount);
+
+    for(i=0; i<ui32ElementCount; ++i)
+    {
+        uint32_t ui32ComponentMasks;
+        ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i];
+        uint32_t ui32SemanticNameOffset;
+
+		psCurrentSignature->ui32Stream = 0;
+		psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
+
+		if(extended)
+			psCurrentSignature->ui32Stream = *pui32Tokens++;
+
+		ui32SemanticNameOffset = *pui32Tokens++;
+        psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
+        psCurrentSignature->eSystemValueType = (SPECIAL_NAME) *pui32Tokens++;
+        psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
+        psCurrentSignature->ui32Register = *pui32Tokens++;
+        
+        ui32ComponentMasks = *pui32Tokens++;
+        psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
+        //Shows which components are read
+        psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
+		psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
+
+		if(extended)
+			psCurrentSignature->eMinPrec = (MIN_PRECISION) *pui32Tokens++;
+
+		psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken+ui32SemanticNameOffset));
+    }
+}
+
+static void ReadOutputSignatures(const uint32_t* pui32Tokens,
+                        ShaderInfo* psShaderInfo,
+						const int minPrec,
+						const int streams)
+{
+    uint32_t i;
+
+    const uint32_t* pui32FirstSignatureToken = pui32Tokens;
+    const uint32_t ui32ElementCount = *pui32Tokens++;
+    /*const uint32_t ui32Key = * */ pui32Tokens++;
+
+	psShaderInfo->psOutputSignatures.clear();
+	psShaderInfo->psOutputSignatures.resize(ui32ElementCount);
+
+    for(i=0; i<ui32ElementCount; ++i)
+    {
+        uint32_t ui32ComponentMasks;
+        ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i];
+        uint32_t ui32SemanticNameOffset;
+
+		psCurrentSignature->ui32Stream = 0;
+		psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
+
+		if(streams)
+			psCurrentSignature->ui32Stream = *pui32Tokens++;
+
+		ui32SemanticNameOffset = *pui32Tokens++;
+        psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
+        psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
+        psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
+        psCurrentSignature->ui32Register = *pui32Tokens++;
+
+		// Massage some special inputs/outputs to match the types of GLSL counterparts
+		if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
+		{
+			psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
+		}
+
+        ui32ComponentMasks = *pui32Tokens++;
+        psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
+        //Shows which components are NEVER written.
+        psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
+		psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
+
+		if(minPrec)
+			psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
+
+		psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
+    }
+}
+
+static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens,
+                    ShaderInfo* psShaderInfo,
+					const int minPrec,
+					const int streams)
+{
+    uint32_t i;
+
+    const uint32_t* pui32FirstSignatureToken = pui32Tokens;
+    const uint32_t ui32ElementCount = *pui32Tokens++;
+    /*const uint32_t ui32Key = * */ pui32Tokens++;
+
+	psShaderInfo->psPatchConstantSignatures.clear();
+	psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount);
+
+    for(i=0; i<ui32ElementCount; ++i)
+    {
+        uint32_t ui32ComponentMasks;
+		ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i];
+        uint32_t ui32SemanticNameOffset;
+
+		psCurrentSignature->ui32Stream = 0;
+		psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
+
+		if(streams)
+			psCurrentSignature->ui32Stream = *pui32Tokens++;
+
+		ui32SemanticNameOffset = *pui32Tokens++;
+        psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
+        psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
+        psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
+        psCurrentSignature->ui32Register = *pui32Tokens++;
+
+		// Massage some special inputs/outputs to match the types of GLSL counterparts
+		if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
+		{
+			psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
+		}
+
+        ui32ComponentMasks = *pui32Tokens++;
+        psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
+        //Shows which components are NEVER written.
+        psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
+		psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
+
+		if(minPrec)
+			psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
+
+		psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
+    }
+}
+
+static const uint32_t* ReadResourceBinding(const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags)
+{
+    uint32_t ui32NameOffset = *pui32Tokens++;
+
+	psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken+ui32NameOffset));
+    FormatVariableName(psBinding->name);
+
+    psBinding->eType = (ResourceType)*pui32Tokens++;
+    psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++;
+    psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++;
+    psBinding->ui32NumSamples = *pui32Tokens++;
+    psBinding->ui32BindPoint = *pui32Tokens++;
+    psBinding->ui32BindCount = *pui32Tokens++;
+    psBinding->ui32Flags = *pui32Tokens++;
+	psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN;
+
+	if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME)
+	{
+		if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6)
+		{
+			psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP;
+			psBinding->name.resize(psBinding->name.length() - 6);
+		}
+		else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8)
+		{
+			psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP;
+			psBinding->name.resize(psBinding->name.length() - 8);
+		}
+		else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5)
+		{
+			psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP;
+			psBinding->name.resize(psBinding->name.length() - 5);
+		}
+	}
+
+    return pui32Tokens;
+}
+
+//Read D3D11_SHADER_TYPE_DESC
+static void ReadShaderVariableType(const uint32_t ui32MajorVersion,
+								   const uint32_t* pui32FirstConstBufToken,
+								   const uint32_t* pui32tokens, ShaderVarType* varType)
+{
+    const uint16_t* pui16Tokens = (const uint16_t*) pui32tokens;
+    uint16_t ui32MemberCount;
+    uint32_t ui32MemberOffset;
+	const uint32_t* pui32MemberTokens;
+	uint32_t i;
+
+    varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0];
+    varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1];
+    varType->Rows = pui16Tokens[2];
+    varType->Columns = pui16Tokens[3];
+    varType->Elements = pui16Tokens[4];
+
+    varType->MemberCount = ui32MemberCount = pui16Tokens[5];
+	varType->Members.clear();
+
+	if(varType->ParentCount)
+	{
+		// Add empty brackets for array parents. Indices are filled in later in the printing codes.
+		if (varType->Parent->Elements > 1)
+			varType->fullName = varType->Parent->fullName + "[]." + varType->name;
+		else
+			varType->fullName = varType->Parent->fullName + "." + varType->name;
+	}
+
+	if(ui32MemberCount)
+	{
+		varType->Members.resize(ui32MemberCount);
+
+		ui32MemberOffset = pui32tokens[3];
+	
+		pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberOffset);
+
+		for(i=0; i< ui32MemberCount; ++i)
+		{
+			uint32_t ui32NameOffset = *pui32MemberTokens++;
+			uint32_t ui32MemberTypeOffset = *pui32MemberTokens++;
+			
+			varType->Members[i].Parent = varType;
+			varType->Members[i].ParentCount = varType->ParentCount + 1;
+
+			varType->Members[i].Offset = *pui32MemberTokens++;
+
+			varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
+
+			ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, 
+				(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberTypeOffset), &varType->Members[i]);
+		}
+	}
+}
+
+static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo,
+    const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer)
+{
+    uint32_t i;
+    uint32_t ui32NameOffset = *pui32Tokens++;
+    uint32_t ui32VarCount = *pui32Tokens++;
+    uint32_t ui32VarOffset = *pui32Tokens++;
+    const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32VarOffset);
+
+	psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
+    FormatVariableName(psBuffer->name);
+
+	psBuffer->asVars.clear();
+	psBuffer->asVars.resize(ui32VarCount);
+
+    for(i=0; i<ui32VarCount; ++i)
+    {
+        //D3D11_SHADER_VARIABLE_DESC
+        ShaderVar * const psVar = &psBuffer->asVars[i];
+
+        uint32_t ui32Flags;
+        uint32_t ui32TypeOffset;
+        uint32_t ui32DefaultValueOffset;
+
+        ui32NameOffset = *pui32VarToken++;
+
+		psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
+        FormatVariableName(psVar->name);
+
+        psVar->ui32StartOffset = *pui32VarToken++;
+        psVar->ui32Size = *pui32VarToken++;
+        ui32Flags = *pui32VarToken++;
+        ui32TypeOffset = *pui32VarToken++;
+
+		psVar->sType.name = psVar->name;
+		psVar->sType.fullName = psVar->name;
+		psVar->sType.Parent = 0;
+		psVar->sType.ParentCount = 0;
+		psVar->sType.Offset = 0;
+		psVar->sType.m_IsUsed = false;
+
+        ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, 
+			(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32TypeOffset), &psVar->sType);
+
+        ui32DefaultValueOffset = *pui32VarToken++;
+
+
+		if (psShaderInfo->ui32MajorVersion  >= 5)
+		{
+			/*uint32_t StartTexture = *	*/pui32VarToken++;
+			/*uint32_t TextureSize = *	*/pui32VarToken++;
+			/*uint32_t StartSampler = *	*/pui32VarToken++;
+			/*uint32_t SamplerSize = *	*/pui32VarToken++;
+		}
+
+		psVar->haveDefaultValue = 0;
+
+        if(ui32DefaultValueOffset)
+        {
+			uint32_t i = 0;
+			const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4;
+			const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32DefaultValueOffset);
+
+			//Always a sequence of 4-bytes at the moment.
+			//bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes.
+			ASSERT(psVar->ui32Size%4 == 0);
+
+			psVar->haveDefaultValue = 1;
+
+			psVar->pui32DefaultValues.clear();
+			psVar->pui32DefaultValues.resize(psVar->ui32Size / 4);
+
+			for(i=0; i<ui32NumDefaultValues;++i)
+			{
+				psVar->pui32DefaultValues[i] = pui32DefaultValToken[i];
+			}
+        }
+    }
+
+
+    {
+        uint32_t ui32Flags;
+        uint32_t ui32BufferType;
+
+        psBuffer->ui32TotalSizeInBytes = *pui32Tokens++;
+        ui32Flags = *pui32Tokens++;
+        ui32BufferType = *pui32Tokens++;
+    }
+
+    return pui32Tokens;
+}
+
+static void ReadResources(const uint32_t* pui32Tokens,//in
+                   ShaderInfo* psShaderInfo, //out
+				   uint32_t decodeFlags)
+{
+    ResourceBinding* psResBindings;
+    ConstantBuffer* psConstantBuffers;
+    const uint32_t* pui32ConstantBuffers;
+    const uint32_t* pui32ResourceBindings;
+    const uint32_t* pui32FirstToken = pui32Tokens;
+    uint32_t i;
+
+	const uint32_t ui32NumConstantBuffers = *pui32Tokens++;
+    const uint32_t ui32ConstantBufferOffset = *pui32Tokens++;
+
+    uint32_t ui32NumResourceBindings = *pui32Tokens++;
+    uint32_t ui32ResourceBindingOffset = *pui32Tokens++;
+    /*uint32_t ui32ShaderModel = * */ pui32Tokens++;
+    /*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx
+
+    //Resources
+    pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset);
+
+	psShaderInfo->psResourceBindings.clear();
+	psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings);
+	psResBindings = &psShaderInfo->psResourceBindings[0];
+
+    for(i=0; i < ui32NumResourceBindings; ++i)
+    {
+        pui32ResourceBindings = ReadResourceBinding(pui32FirstToken, pui32ResourceBindings, psResBindings+i, decodeFlags);
+		ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS);
+	}
+
+    //Constant buffers
+    pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset);
+
+	psShaderInfo->psConstantBuffers.clear();
+	psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers);
+	psConstantBuffers = &psShaderInfo->psConstantBuffers[0];
+
+    for(i=0; i < ui32NumConstantBuffers; ++i)
+    {
+        pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers+i);
+    }
+
+
+	//Map resource bindings to constant buffers
+	if(psShaderInfo->psConstantBuffers.size())
+	{
+		for(i=0; i < ui32NumResourceBindings; ++i)
+		{
+			ResourceGroup eRGroup;
+			uint32_t cbufIndex = 0;
+
+			eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType);
+
+			//Find the constant buffer whose name matches the resource at the given resource binding point
+			for(cbufIndex=0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++)
+			{
+				if(psConstantBuffers[cbufIndex].name == psResBindings[i].name)
+				{
+					psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex;
+				}
+			}
+		}
+	}
+}
+
+static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType)
+{
+    const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens;
+    uint32_t ui32NameOffset = *pui32Tokens;
+    pui16Tokens+= 2;
+
+    psClassType->ui16ID = *pui16Tokens++;
+    psClassType->ui16ConstBufStride = *pui16Tokens++;
+    psClassType->ui16Texture = *pui16Tokens++;
+    psClassType->ui16Sampler = *pui16Tokens++;
+
+	psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
+
+    return pui16Tokens;
+}
+
+static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance)
+{
+    uint32_t ui32NameOffset = *pui16Tokens++ << 16;
+    ui32NameOffset |= *pui16Tokens++;
+
+    psClassInstance->ui16ID = *pui16Tokens++;
+    psClassInstance->ui16ConstBuf = *pui16Tokens++;
+    psClassInstance->ui16ConstBufOffset = *pui16Tokens++;
+    psClassInstance->ui16Texture = *pui16Tokens++;
+    psClassInstance->ui16Sampler = *pui16Tokens++;
+
+	psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
+
+    return pui16Tokens;
+}
+
+
+static void ReadInterfaces(const uint32_t* pui32Tokens,
+                        ShaderInfo* psShaderInfo)
+{
+    uint32_t i;
+    uint32_t ui32StartSlot;
+    const uint32_t* pui32FirstInterfaceToken = pui32Tokens;
+    const uint32_t ui32ClassInstanceCount = *pui32Tokens++;
+    const uint32_t ui32ClassTypeCount = *pui32Tokens++;
+    const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++;
+    /*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++;
+    const uint32_t ui32ClassInstanceOffset = *pui32Tokens++;
+    const uint32_t ui32ClassTypeOffset = *pui32Tokens++;
+    const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++;
+
+    const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset);
+    const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset);
+    const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset);
+
+    const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots;
+
+    ClassType* psClassTypes;
+    ClassInstance* psClassInstances;
+
+	psShaderInfo->psClassTypes.clear();
+	psShaderInfo->psClassTypes.resize(ui32ClassTypeCount);
+	psClassTypes = &psShaderInfo->psClassTypes[0];
+
+    for(i=0; i<ui32ClassTypeCount; ++i)
+    {
+        pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes+i);
+        psClassTypes[i].ui16ID = (uint16_t)i;
+    }
+
+	psShaderInfo->psClassInstances.clear();
+	psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount);
+	psClassInstances = &psShaderInfo->psClassInstances[0];
+
+	for(i=0; i<ui32ClassInstanceCount; ++i)
+    {
+        pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances+i);
+    }
+
+    //Slots map function table to $ThisPointer cbuffer variable index
+    ui32StartSlot = 0;
+    for(i=0; i<ui32InterfaceSlotRecordCount;++i)
+    {
+        uint32_t k;
+        
+        const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++;
+        const uint32_t ui32Count = *pui32InterfaceSlotTokens++;
+        const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++;
+        const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++;
+
+        const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken+ui32TypeIDOffset);
+        const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken+ui32TableIDOffset);
+
+        for(k=0; k < ui32Count; ++k)
+        {
+            psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++;
+        }
+
+        ui32StartSlot += ui32SlotSpan;
+    }
+
+}
+
+void LoadShaderInfo(const uint32_t ui32MajorVersion,
+    const uint32_t ui32MinorVersion,
+    const ReflectionChunks* psChunks,
+    ShaderInfo* psInfo,
+	uint32_t decodeFlags)
+{
+    const uint32_t* pui32Inputs = psChunks->pui32Inputs;
+	const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11;
+    const uint32_t* pui32Resources = psChunks->pui32Resources;
+    const uint32_t* pui32Interfaces = psChunks->pui32Interfaces;
+    const uint32_t* pui32Outputs = psChunks->pui32Outputs;
+	const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11;
+	const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams;
+	const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants;
+	const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11;
+
+    psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
+    psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
+
+    psInfo->ui32MajorVersion = ui32MajorVersion;
+    psInfo->ui32MinorVersion = ui32MinorVersion;
+
+
+    if(pui32Inputs)
+        ReadInputSignatures(pui32Inputs, psInfo, 0);
+    if(pui32Inputs11)
+        ReadInputSignatures(pui32Inputs11, psInfo, 1);
+    if(pui32Resources)
+        ReadResources(pui32Resources, psInfo, decodeFlags);
+    if(pui32Interfaces)
+        ReadInterfaces(pui32Interfaces, psInfo);
+    if(pui32Outputs)
+        ReadOutputSignatures(pui32Outputs, psInfo, 0, 0);
+    if(pui32Outputs11)
+        ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1);
+	if(pui32OutputsWithStreams)
+		ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1);
+	if(pui32PatchConstants)
+		ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0);
+	if (pui32PatchConstants11)
+		ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1);
+
+    {
+        uint32_t i;
+        for(i=0; i<psInfo->psConstantBuffers.size();++i)
+        {
+			if (psInfo->psConstantBuffers[i].name == "$ThisPointer")
+            {
+                psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i];
+            }
+        }
+    }
+}
+
--- a/src/toGLSL.cpp
+++ b/src/toGLSL.cpp
@ -0,0 +1,806 @@
+#include <memory>
+
+#include "internal_includes/tokens.h"
+#include "internal_includes/decode.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "bstrlib.h"
+#include "internal_includes/toGLSL.h"
+#include "internal_includes/toGLSLOperand.h"
+#include "internal_includes/Declaration.h"
+#include "internal_includes/languages.h"
+#include "internal_includes/debug.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include "internal_includes/UseDefineChains.h"
+#include "internal_includes/DataTypeAnalysis.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/Instruction.h"
+#include "internal_includes/LoopTransform.h"
+#include <algorithm>
+#include <sstream>
+
+// In GLSL, the input and output names cannot clash.
+// Also, the output name of previous stage must match the input name of the next stage.
+// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
+//
+void ToGLSL::SetIOPrefixes()
+{
+	switch (psContext->psShader->eShaderType)
+	{
+		case VERTEX_SHADER:
+			psContext->inputPrefix = "in_";
+			psContext->outputPrefix = "vs_";
+			break;
+
+		case HULL_SHADER:
+			// Input always coming from vertex shader
+			psContext->inputPrefix = "vs_";
+			psContext->outputPrefix = "hs_";
+			break;
+
+		case DOMAIN_SHADER:
+			// There's no domain shader without hull shader
+			psContext->inputPrefix = "hs_";
+			psContext->outputPrefix = "ds_";
+			break;
+
+		case GEOMETRY_SHADER:
+			// The input depends on whether there's a tessellation shader before us
+			if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER))
+				psContext->inputPrefix = "ds_";
+			else
+				psContext->inputPrefix = "vs_";
+
+			psContext->outputPrefix = "gs_";
+			break;
+
+        case PIXEL_SHADER:
+			// The inputs can come from geom shader, domain shader or directly from vertex shader
+			if (psContext->psDependencies)
+        {
+				if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
+            {
+					psContext->inputPrefix = "gs_";
+            }
+				else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
+				{
+					psContext->inputPrefix = "ds_";
+        }
+				else
+        {
+					psContext->inputPrefix = "vs_";
+        }
+    }
+			else
+			{
+				psContext->inputPrefix = "vs_";
+			}
+			psContext->outputPrefix = "";
+			break;
+
+
+		case COMPUTE_SHADER:
+		default:
+			// No prefixes
+			psContext->inputPrefix = "";
+			psContext->outputPrefix = "";
+			break;
+    }
+}
+
+
+static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
+{
+	bstring glsl = *psContext->currentGLSLString;
+	bstring extensions = psContext->extensions;
+	bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310);
+	bool GL_ARB_shader_image_load_store = false;
+
+	if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330))
+	{
+		bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
+	}
+
+	if(!HaveCompute(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->eShaderType == COMPUTE_SHADER)
+		{
+			bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n");
+		}
+
+		if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW])
+		{
+			bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n");
+		}
+	}
+
+	if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) ||
+		!HaveAtomicCounter(psContext->psShader->eTargetLanguage))
+	{
+		if( psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED])
+		{
+			bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n");
+		}
+	}
+
+	if (!HaveImageAtomics(psContext->psShader->eTargetLanguage))
+	{
+		if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH])
+		{
+			if (isES)
+				bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n");
+			else
+				GL_ARB_shader_image_load_store = true;
+		}
+	}
+
+	if(!HaveGather(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C])
+		{
+			bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n");
+		}
+	}
+
+	if(!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO])
+		{
+			bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n");
+		}
+	}
+
+	if(!HaveQueryLod(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD])
+		{
+			bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n");
+		}
+	}
+
+	if(!HaveQueryLevels(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO])
+		{
+			bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n");
+		}
+	}
+
+	if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED])
+		{
+			GL_ARB_shader_image_load_store = true;
+			bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
+		}
+		else
+		if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] ||
+			psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED])
+		{
+			GL_ARB_shader_image_load_store = true;
+		}
+	}
+
+	if(!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage))
+	{
+		if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
+		{
+			bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n");
+		}
+	}
+
+	if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
+	{
+		if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
+		{
+			bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n");
+			bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n");
+		}
+	}
+
+	if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
+	{
+		if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)
+		{
+			bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n");
+			bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n");
+		}
+	}
+
+	if (GL_ARB_shader_image_load_store)
+		bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n");
+
+	//Handle fragment shader default precision
+	if ((psContext->psShader->eShaderType == PIXEL_SHADER) &&
+		(psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310))
+	{
+		// Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText()
+		// Except on Vulkan
+		if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
+			bcatcstr(glsl, "precision highp float;\n");
+
+
+		// Define default int precision to highp to avoid issues on platforms that actually implement mediump 
+		bcatcstr(glsl, "precision highp int;\n");
+	}
+
+	if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage))
+	{
+		bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n");
+	}
+
+	if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150)
+	{
+		if(psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT)
+			bcatcstr(glsl,"layout(origin_upper_left) in vec4 gl_FragCoord;\n");
+
+		if(psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER)
+			bcatcstr(glsl,"layout(pixel_center_integer) in vec4 gl_FragCoord;\n");
+	}
+
+
+    /*
+        OpenGL 4.1 API spec:
+        To use any built-in input or output in the gl_PerVertex block in separable
+        program objects, shader code must redeclare that block prior to use.
+    */
+	/* DISABLED FOR NOW */
+/*	if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410)
+    {
+        bcatcstr(glsl, "out gl_PerVertex {\n");
+        bcatcstr(glsl, "vec4 gl_Position;\n");
+        bcatcstr(glsl, "float gl_PointSize;\n");
+        bcatcstr(glsl, "float gl_ClipDistance[];");
+        bcatcstr(glsl, "};\n");
+    }*/
+}
+
+GLLang ChooseLanguage(Shader* psShader)
+{
+    // Depends on the HLSL shader model extracted from bytecode.
+    switch(psShader->ui32MajorVersion)
+    {
+        case 5:
+        {
+            return LANG_430;
+        }
+        case 4:
+        {
+            return LANG_330;
+        }
+        default:
+        {
+            return LANG_120;
+        }
+    }
+}
+
+const char* GetVersionString(GLLang language)
+{
+    switch(language)
+    {
+        case LANG_ES_100:
+        {
+            return "#version 100\n";
+            break;
+        }
+        case LANG_ES_300:
+        {
+            return "#version 300 es\n";
+            break;
+        }
+        case LANG_ES_310:
+        {
+            return "#version 310 es\n";
+            break;
+        }
+        case LANG_120:
+        {
+            return "#version 120\n";
+            break;
+        }
+        case LANG_130:
+        {
+            return "#version 130\n";
+            break;
+        }
+        case LANG_140:
+        {
+            return "#version 140\n";
+            break;
+        }
+        case LANG_150:
+        {
+            return "#version 150\n";
+            break;
+        }
+        case LANG_330:
+        {
+            return "#version 330\n";
+            break;
+        }
+        case LANG_400:
+        {
+            return "#version 400\n";
+            break;
+        }
+        case LANG_410:
+        {
+            return "#version 410\n";
+            break;
+        }
+        case LANG_420:
+        {
+            return "#version 420\n";
+            break;
+        }
+        case LANG_430:
+        {
+            return "#version 430\n";
+            break;
+        }
+        case LANG_440:
+        {
+            return "#version 440\n";
+            break;
+        }
+        default:
+        {
+            return "";
+            break;
+        }
+    }
+}
+
+static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType)
+{
+	switch (eType)
+	{
+	default:
+	case MAIN_PHASE: return "";
+	case HS_GLOBAL_DECL_PHASE: return "hs_global_decls";
+	case HS_FORK_PHASE: return "fork_phase";
+	case HS_CTRL_POINT_PHASE: return "control_point_phase";
+	case HS_JOIN_PHASE: return "join_phase";
+	}
+}
+
+static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
+{
+	uint32_t i;
+	bstring glsl = psContext->glsl;
+
+	for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
+	{
+		ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
+		const char *Type;
+		uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
+		switch (psSig->eComponentType)
+		{
+		default:
+		case INOUT_COMPONENT_FLOAT32:
+			Type = ui32NumComponents > 1 ? "vec" : "float";
+			break;
+		case INOUT_COMPONENT_SINT32:
+			Type = ui32NumComponents > 1 ? "ivec" : "int";
+			break;
+		case INOUT_COMPONENT_UINT32:
+			Type = ui32NumComponents > 1 ? "uvec" : "uint";
+			break;
+		}
+		if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
+			continue;
+
+		std::string inputName;
+
+		{
+			std::ostringstream oss;
+			oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
+			inputName = oss.str();
+		}
+
+		std::string outputName;
+		{
+			std::ostringstream oss;
+			oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
+			outputName = oss.str();
+		}
+
+		const char * prec = HavePrecisionQualifers(psContext->psShader->eTargetLanguage) ? "highp ": "";
+
+		psContext->AddIndentation();
+		if (ui32NumComponents > 1) // TODO Precision
+			bformata(glsl, "in %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
+		else
+			bformata(glsl, "in %s%s %s%s%d[];\n", prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
+
+		psContext->AddIndentation();
+		if (ui32NumComponents > 1) // TODO Precision
+			bformata(glsl, "out %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
+		else
+			bformata(glsl, "out %s%s %s%s%d[];\n", prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
+	}
+
+	psContext->AddIndentation();
+	bcatcstr(glsl, "void passthrough_ctrl_points()\n");
+	psContext->AddIndentation();
+	bcatcstr(glsl, "{\n");
+	psContext->indent++;
+
+	for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
+	{
+		const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
+
+		psContext->AddIndentation();
+
+		if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
+			bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n");
+		else
+			bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
+	}
+
+	psContext->indent--;
+	psContext->AddIndentation();
+	bcatcstr(glsl, "}\n");
+}
+
+GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage)
+{
+	language = suggestedLanguage;
+	if (language == LANG_DEFAULT)
+	{
+		language = ChooseLanguage(psContext->psShader);
+	}
+	return language;
+}
+
+bool ToGLSL::Translate()
+{
+    bstring glsl;
+    uint32_t i;
+    Shader* psShader = psContext->psShader;
+	uint32_t ui32Phase;
+
+	psContext->psTranslator = this;
+
+	if (language == LANG_DEFAULT)
+		SetLanguage(LANG_DEFAULT);
+
+	SetIOPrefixes();
+	psShader->ExpandSWAPCs();
+	psShader->ForcePositionToHighp();
+	psShader->AnalyzeIOOverlap();
+	psShader->FindUnusedGlobals(psContext->flags);
+
+    psContext->indent = 0;
+
+    glsl = bfromcstralloc (1024 * 10, "\n");
+    bstring extensions = bfromcstralloc (1024 * 10, GetVersionString(language));
+    psContext->extensions = extensions;
+
+    psContext->glsl = glsl;
+    for(i=0; i<psShader->asPhases.size();++i)
+    {
+        psShader->asPhases[i].postShaderCode = bfromcstralloc (1024 * 5, "");
+		psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
+	}
+    psContext->currentGLSLString = &glsl;
+    psShader->eTargetLanguage = language;
+    psContext->currentPhase = MAIN_PHASE;
+
+	if (psShader->extensions)
+	{
+		if (psShader->extensions->ARB_explicit_attrib_location)
+			bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n");
+		if (psShader->extensions->ARB_explicit_uniform_location)
+			bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n");
+		if (psShader->extensions->ARB_shading_language_420pack)
+			bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n");
+	}
+
+    psContext->ClearDependencyData();
+
+    AddVersionDependentCode(psContext);
+
+	psShader->PrepareStructuredBufferBindingSlots();
+
+	for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
+	{
+		ShaderPhase &phase = psShader->asPhases[ui32Phase];
+		phase.UnvectorizeImmMoves();
+		psContext->DoDataTypeAnalysis(&phase);
+		phase.ResolveUAVProperties();
+		psShader->ResolveStructuredBufferBindingSlots(&phase);
+		phase.PruneConstArrays();
+	}
+
+	psShader->PruneTempRegisters();
+
+	for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
+	{
+		// Loop transform can only be done after the temps have been pruned
+		ShaderPhase &phase = psShader->asPhases[ui32Phase];
+		HLSLcc::DoLoopTransform(phase);
+	}
+
+	//Special case. Can have multiple phases.
+    if(psShader->eShaderType == HULL_SHADER)
+    {
+		const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE };
+		uint32_t ui32PhaseCallIndex;
+		int perPatchSectionAdded = 0;
+		int hasControlPointPhase = 0;
+
+		psShader->ConsolidateHullTempVars();
+
+		// Find out if we have a passthrough hull shader
+		for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
+		{
+			if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
+				hasControlPointPhase = 1;
+		}
+
+		// Phase 1 is always the global decls phase, no instructions
+		for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i)
+        {
+			TranslateDeclaration(&psShader->asPhases[1].psDecl[i]);
+        }
+
+		if (hasControlPointPhase == 0)
+		{
+			DoHullShaderPassthrough(psContext);
+		}
+
+		for(ui32Phase=2; ui32Phase<psShader->asPhases.size(); ui32Phase++)
+		{
+			ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
+			psContext->currentPhase = ui32Phase;
+
+#ifdef _DEBUG
+			bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
+#endif
+			for (i = 0; i < psPhase->psDecl.size(); ++i)
+			{
+				TranslateDeclaration(&psPhase->psDecl[i]);
+			}
+
+			bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase);
+			psContext->indent++;
+
+			if (psPhase->psInst.size() > 0)
+			{
+				//The minus one here is remove the return statement at end of phases.
+				//We don't want to translate that, we'll just end the function body.
+				ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET);
+				for (i = 0; i < psPhase->psInst.size() - 1; ++i)
+				{
+					TranslateInstruction(&psPhase->psInst[i]);
+				}
+			}
+
+
+			psContext->indent--;
+			bcatcstr(glsl, "}\n");
+		}
+
+        bcatcstr(glsl, "void main()\n{\n");
+
+        psContext->indent++;
+
+		// There are cases when there are no control point phases and we have to do passthrough
+		if (hasControlPointPhase == 0)
+		{
+			// Passthrough control point phase, run the rest only once per patch
+			psContext->AddIndentation();
+			bcatcstr(glsl, "passthrough_ctrl_points();\n");
+			psContext->AddIndentation();
+			bcatcstr(glsl, "barrier();\n");
+			psContext->AddIndentation();
+			bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
+			psContext->AddIndentation();
+			bcatcstr(glsl, "{\n");
+			psContext->indent++;
+			perPatchSectionAdded = 1;
+		}
+
+		for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++)
+		{
+			for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
+			{
+				uint32_t i;
+				ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
+				if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
+					continue;
+
+				if (psPhase->earlyMain->slen > 1)
+				{
+#ifdef _DEBUG
+					psContext->AddIndentation();
+					bcatcstr(glsl, "//--- Start Early Main ---\n");
+#endif
+					bconcat(glsl, psPhase->earlyMain);
+#ifdef _DEBUG
+					psContext->AddIndentation();
+					bcatcstr(glsl, "//--- End Early Main ---\n");
+#endif
+				}
+
+				for (i = 0; i < psPhase->ui32InstanceCount; i++)
+				{
+
+					psContext->AddIndentation();
+					bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i);
+				}
+
+				if (psPhase->hasPostShaderCode)
+				{
+#ifdef _DEBUG
+					psContext->AddIndentation();
+					bcatcstr(glsl, "//--- Post shader code ---\n");
+#endif
+					bconcat(glsl, psPhase->postShaderCode);
+#ifdef _DEBUG
+					psContext->AddIndentation();
+					bcatcstr(glsl, "//--- End post shader code ---\n");
+#endif
+				}
+
+
+				if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
+				{
+					// We're done printing control point phase, run the rest only once per patch
+					psContext->AddIndentation();
+					bcatcstr(glsl, "barrier();\n");
+					psContext->AddIndentation();
+					bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
+					psContext->AddIndentation();
+					bcatcstr(glsl, "{\n");
+					psContext->indent++;
+					perPatchSectionAdded = 1;
+				}
+			}
+		}
+
+		if (perPatchSectionAdded != 0)
+		{
+			psContext->indent--;
+			psContext->AddIndentation();
+			bcatcstr(glsl, "}\n");
+		}
+
+		psContext->indent--;
+
+        bcatcstr(glsl, "}\n");
+
+        // Concat extensions and glsl for the final shader code.
+        bconcat(extensions, glsl);
+        bdestroy(glsl);
+        psContext->glsl = extensions;
+        glsl = NULL;
+
+        if(psContext->psDependencies)
+        {
+            //Save partitioning and primitive type for use by domain shader.
+            psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim;
+
+            psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning;
+        }
+
+        return true;
+	}
+
+    if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies)
+    {
+        //Load partitioning and primitive type from hull shader.
+        switch(psContext->psDependencies->eTessOutPrim)
+        {
+			case TESSELLATOR_OUTPUT_TRIANGLE_CCW:
+			{
+				bcatcstr(glsl, "layout(ccw) in;\n");
+				break;
+			}
+			case TESSELLATOR_OUTPUT_TRIANGLE_CW:
+            {
+                bcatcstr(glsl, "layout(cw) in;\n");
+                break;
+            }
+            case TESSELLATOR_OUTPUT_POINT:
+            {
+                bcatcstr(glsl, "layout(point_mode) in;\n");
+                break;
+            }
+            default:
+            {
+                break;
+            }
+        }
+
+        switch(psContext->psDependencies->eTessPartitioning)
+        {
+            case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
+            {
+                bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n");
+                break;
+            }
+            case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
+            {
+                bcatcstr(glsl, "layout(fractional_even_spacing) in;\n");
+                break;
+            }
+            default:
+            {
+                break;
+            }
+        }
+    }
+
+	for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
+	{
+		TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
+	}
+
+    bcatcstr(glsl, "void main()\n{\n");
+
+    psContext->indent++;
+
+	if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
+	{
+#ifdef _DEBUG
+		psContext->AddIndentation();
+		bcatcstr(glsl, "//--- Start Early Main ---\n");
+#endif
+		bconcat(glsl, psContext->psShader->asPhases[0].earlyMain);
+#ifdef _DEBUG
+		psContext->AddIndentation();
+		bcatcstr(glsl, "//--- End Early Main ---\n");
+#endif
+	}
+
+    for(i=0; i < psShader->asPhases[0].psInst.size(); ++i)
+    {
+		TranslateInstruction(&psShader->asPhases[0].psInst[i]);
+    }
+
+    psContext->indent--;
+
+    bcatcstr(glsl, "}\n");
+
+    // Concat extensions and glsl for the final shader code.
+    bconcat(extensions, glsl);
+    bdestroy(glsl);
+    psContext->glsl = extensions;
+    glsl = NULL;
+
+    return true;
+}
+
+
--- a/src/toGLSLDeclaration.cpp
+++ b/src/toGLSLDeclaration.cpp
--- a/src/toGLSLInstruction.cpp
+++ b/src/toGLSLInstruction.cpp
--- a/src/toGLSLOperand.cpp
+++ b/src/toGLSLOperand.cpp
--- a/src/toMetal.cpp
+++ b/src/toMetal.cpp
@ -0,0 +1,265 @@
+
+#include "internal_includes/toMetal.h"
+#include "internal_includes/HLSLCrossCompilerContext.h"
+#include "internal_includes/Shader.h"
+#include "internal_includes/debug.h"
+
+#include "internal_includes/Declaration.h"
+#include "internal_includes/toGLSL.h"
+#include "internal_includes/LoopTransform.h"
+#include "internal_includes/HLSLccToolkit.h"
+#include <algorithm>
+
+static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs)
+{
+	StructDefinition &d = defs[sname];
+	if (d.m_IsPrinted)
+		return;
+	d.m_IsPrinted = true;
+
+
+	std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName)
+	{
+		PrintStructDeclaration(psContext, glsl, depName, defs);
+	});
+
+	bformata(glsl, "struct %s\n{\n", sname.c_str());
+	psContext->indent++;
+	std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem)
+	{
+		psContext->AddIndentation();
+		bcatcstr(glsl, mem.c_str());
+		bcatcstr(glsl, ";\n");
+	});
+
+	psContext->indent--;
+	bcatcstr(glsl, "};\n\n");
+}
+
+void ToMetal::PrintStructDeclarations(StructDefinitions &defs)
+{
+	bstring glsl = *psContext->currentGLSLString;
+	StructDefinition &args = defs[""];
+	std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname)
+	{
+		PrintStructDeclaration(psContext, glsl, sname, defs);
+	});
+
+}
+
+bool ToMetal::Translate()
+{
+	bstring glsl;
+	uint32_t i;
+	Shader* psShader = psContext->psShader;
+	psContext->psTranslator = this;
+
+	SetIOPrefixes();
+	psShader->ExpandSWAPCs();
+	psShader->ForcePositionToHighp();
+	psShader->AnalyzeIOOverlap();
+	psShader->FindUnusedGlobals(psContext->flags);
+
+	psContext->indent = 0;
+
+	glsl = bfromcstralloc(1024 * 10, "");
+	bstring bodyglsl = bfromcstralloc(1024 * 10, "");
+
+	psContext->glsl = glsl;
+	for (i = 0; i < psShader->asPhases.size(); ++i)
+	{
+		psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, "");
+		psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
+	}
+
+	psContext->currentGLSLString = &glsl;
+	psShader->eTargetLanguage = LANG_METAL;
+	psShader->extensions = NULL;
+	psContext->currentPhase = MAIN_PHASE;
+
+	psContext->ClearDependencyData();
+
+	ClampPartialPrecisions();
+
+	psShader->PrepareStructuredBufferBindingSlots();
+
+	ShaderPhase &phase = psShader->asPhases[0];
+	phase.UnvectorizeImmMoves();
+	psContext->DoDataTypeAnalysis(&phase);
+	phase.ResolveUAVProperties();
+	psShader->ResolveStructuredBufferBindingSlots(&phase);
+	phase.PruneConstArrays();
+	HLSLcc::DoLoopTransform(phase);
+
+	psShader->PruneTempRegisters();
+
+	bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
+
+
+	for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
+	{
+		TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
+	}
+
+	if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
+	{
+		m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]");
+		m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName());
+	}
+
+	if (psShader->eShaderType != COMPUTE_SHADER)
+	{
+		if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0)
+		{
+			m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName());
+		}
+	}
+
+	PrintStructDeclarations(m_StructDefinitions);
+
+	psContext->currentGLSLString = &bodyglsl;
+
+	switch (psShader->eShaderType)
+	{
+		case VERTEX_SHADER:
+			bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
+			break;
+		case PIXEL_SHADER:
+			bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n");
+			break;
+		case COMPUTE_SHADER:
+			bcatcstr(bodyglsl, "kernel void computeMain(\n");
+			break;
+		default:
+			// Not supported
+			ASSERT(0);
+			return false;
+	}
+	psContext->indent++;
+	for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++)
+	{
+		psContext->AddIndentation();
+		bcatcstr(bodyglsl, itr->c_str());
+		if (itr + 1 != m_StructDefinitions[""].m_Members.end())
+			bcatcstr(bodyglsl, ",\n");
+	}
+
+	bcatcstr(bodyglsl, ")\n{\n");
+	if (psShader->eShaderType != COMPUTE_SHADER)
+	{
+		psContext->AddIndentation();
+		bcatcstr(bodyglsl, GetOutputStructName().c_str());
+		bcatcstr(bodyglsl, " output;\n");
+	}
+
+	if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
+	{
+#ifdef _DEBUG
+		psContext->AddIndentation();
+		bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
+#endif
+		bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
+#ifdef _DEBUG
+		psContext->AddIndentation();
+		bcatcstr(bodyglsl, "//--- End Early Main ---\n");
+#endif
+	}
+
+	for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
+	{
+		TranslateInstruction(&psShader->asPhases[0].psInst[i]);
+	}
+
+	psContext->indent--;
+
+	bcatcstr(bodyglsl, "}\n");
+
+	psContext->currentGLSLString = &glsl;
+	
+	bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str());
+	
+	// Print out extra functions we generated
+	std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p)
+	{
+		bcatcstr(glsl, p.second.c_str());
+		bcatcstr(glsl, "\n");
+	});
+
+	// And then the actual function body
+	bconcat(glsl, bodyglsl);
+	bdestroy(bodyglsl);
+
+	return true;
+}
+
+void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body)
+{
+	if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
+		return;
+	m_FunctionDefinitions.insert(std::make_pair(name, body));
+}
+
+
+std::string ToMetal::GetOutputStructName() const
+{
+	switch(psContext->psShader->eShaderType)
+	{
+		case VERTEX_SHADER:
+			return "Mtl_VertexOut";
+		case PIXEL_SHADER:
+			return "Mtl_FragmentOut";
+		default:
+			ASSERT(0);
+			return "";
+	}
+}
+
+std::string ToMetal::GetInputStructName() const
+{
+	switch (psContext->psShader->eShaderType)
+	{
+		case VERTEX_SHADER:
+			return "Mtl_VertexIn";
+		case PIXEL_SHADER:
+			return "Mtl_FragmentIn";
+		case COMPUTE_SHADER:
+			return "Mtl_KernelIn";
+		default:
+			ASSERT(0);
+			return "";
+	}
+}
+
+void ToMetal::SetIOPrefixes()
+{
+	switch (psContext->psShader->eShaderType)
+	{
+		case VERTEX_SHADER:
+			psContext->inputPrefix = "input.";
+			psContext->outputPrefix = "output.";
+			break;
+
+		case PIXEL_SHADER:
+			psContext->inputPrefix = "input.";
+			psContext->outputPrefix = "output.";
+			break;
+
+		case COMPUTE_SHADER:
+			psContext->inputPrefix = "";
+			psContext->outputPrefix = "";
+			break;
+		default:
+			ASSERT(0);
+			break;
+	}
+}
+
+void ToMetal::ClampPartialPrecisions()
+{
+	HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL,
+		[](std::vector<Instruction>::iterator &i, Operand *o, uint32_t flags)
+	{
+		if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8)
+			o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16;
+	});
+}
--- a/src/toMetalDeclaration.cpp
+++ b/src/toMetalDeclaration.cpp
--- a/src/toMetalInstruction.cpp
+++ b/src/toMetalInstruction.cpp
--- a/src/toMetalOperand.cpp
+++ b/src/toMetalOperand.cpp
				`@ -0,0 +1,2 @@`

				`#include "internal_includes/Declaration.h"`