diff --git a/Makefile b/Makefile
index 742419f7..456f3a7c 100644
--- a/Makefile
+++ b/Makefile
@@ -13,9 +13,9 @@ DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d)
 CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow
 
 ifeq ($(DEBUG), 1)
-	CXXFLAGS += -O0
+	CXXFLAGS += -O0 -g
 else
-	CXXFLAGS += -O2
+	CXXFLAGS += -O2 -g
 endif
 
 all: $(TARGET)
diff --git a/README.md b/README.md
index 6a154c55..d0580a5c 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,6 @@ The goal is to emit GLSL or MSL that looks like it was written by a human and no
 NOTE: Individual features are expected to be mostly complete, but it is possible that certain obscure GLSL features are not yet supported.
 However, most missing features are expected to be "trivial" improvements at this stage.
 
-Occasionally, missing features is due to glslangValidator's lack of proper support for that feature making testing hard.
-
 ## Building
 
 SPIRV-Cross has been tested on Linux, OSX and Windows.
@@ -125,7 +123,7 @@ glslangValidator then back through SPIRV-Cross again. The reference files are st
 All pull requests should ensure that test output does not change unexpectedly. This can be tested with `./test_shaders.py shaders`.
 However, when improving SPIRV-Cross there are of course legitimate cases where reference output should change.
 In these cases, run `./test_shaders.py shaders --update` to update the reference files and include these changes as part of the pull request.
-Always make sure you are running up to date glslangValidator when updating reference files.
+Always make sure you are running up to date glslangValidator as well as SPIRV-Tools when updating reference files.
 
 In short, the master branch should always be able to run `./test_shaders.py shaders` without failure.
 
diff --git a/reference/shaders/asm/comp/bitcast_iadd.asm.comp b/reference/shaders/asm/comp/bitcast_iadd.asm.comp
new file mode 100644
index 00000000..acc39eb2
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_iadd.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 + uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) + _5._1);
+    _6._0 = (_5._1 + _5._1);
+    _6._0 = uvec4(_5._0 + _5._0);
+    _6._1 = ivec4(_5._1 + _5._1);
+    _6._1 = (_5._0 + _5._0);
+    _6._1 = (ivec4(_5._1) + _5._0);
+    _6._1 = (_5._0 + ivec4(_5._1));
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
new file mode 100644
index 00000000..2663a70a
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -0,0 +1,31 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    bvec4 _34 = equal(ivec4(_5._1), _5._0);
+    bvec4 _35 = equal(_5._0, ivec4(_5._1));
+    bvec4 _36 = equal(_5._1, _5._1);
+    bvec4 _37 = equal(_5._0, _5._0);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _34);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _35);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _36);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _37);
+    _6._1 = mix(ivec4(0), ivec4(1), _34);
+    _6._1 = mix(ivec4(0), ivec4(1), _35);
+    _6._1 = mix(ivec4(0), ivec4(1), _36);
+    _6._1 = mix(ivec4(0), ivec4(1), _37);
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_sar.asm.comp b/reference/shaders/asm/comp/bitcast_sar.asm.comp
new file mode 100644
index 00000000..3530ed6f
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_sar.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = uvec4(ivec4(_5._1) >> _5._0);
+    _6._0 = uvec4(_5._0 >> ivec4(_5._1));
+    _6._0 = uvec4(ivec4(_5._1) >> ivec4(_5._1));
+    _6._0 = uvec4(_5._0 >> _5._0);
+    _6._1 = (ivec4(_5._1) >> ivec4(_5._1));
+    _6._1 = (_5._0 >> _5._0);
+    _6._1 = (ivec4(_5._1) >> _5._0);
+    _6._1 = (_5._0 >> ivec4(_5._1));
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_sdiv.asm.comp b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
new file mode 100644
index 00000000..4fd7769d
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = uvec4(ivec4(_5._1) / _5._0);
+    _6._0 = uvec4(_5._0 / ivec4(_5._1));
+    _6._0 = uvec4(ivec4(_5._1) / ivec4(_5._1));
+    _6._0 = uvec4(_5._0 / _5._0);
+    _6._1 = (ivec4(_5._1) / ivec4(_5._1));
+    _6._1 = (_5._0 / _5._0);
+    _6._1 = (ivec4(_5._1) / _5._0);
+    _6._1 = (_5._0 / ivec4(_5._1));
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_slr.asm.comp b/reference/shaders/asm/comp/bitcast_slr.asm.comp
new file mode 100644
index 00000000..c50fc0ba
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_slr.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 >> uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) >> _5._1);
+    _6._0 = (_5._1 >> _5._1);
+    _6._0 = (uvec4(_5._0) >> uvec4(_5._0));
+    _6._1 = ivec4(_5._1 >> _5._1);
+    _6._1 = ivec4(uvec4(_5._0) >> uvec4(_5._0));
+    _6._1 = ivec4(_5._1 >> uvec4(_5._0));
+    _6._1 = ivec4(uvec4(_5._0) >> _5._1);
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_udiv.asm.comp b/reference/shaders/asm/comp/bitcast_udiv.asm.comp
new file mode 100644
index 00000000..1d384de4
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_udiv.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 / uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) / _5._1);
+    _6._0 = (_5._1 / _5._1);
+    _6._0 = (uvec4(_5._0) / uvec4(_5._0));
+    _6._1 = ivec4(_5._1 / _5._1);
+    _6._1 = ivec4(uvec4(_5._0) / uvec4(_5._0));
+    _6._1 = ivec4(_5._1 / uvec4(_5._0));
+    _6._1 = ivec4(uvec4(_5._0) / _5._1);
+}
+
diff --git a/reference/shaders/comp/casts.comp b/reference/shaders/comp/casts.comp
new file mode 100644
index 00000000..97366867
--- /dev/null
+++ b/reference/shaders/comp/casts.comp
@@ -0,0 +1,19 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    ivec4 outputs[];
+} _21;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    ivec4 inputs[];
+} _27;
+
+void main()
+{
+    uint ident = gl_GlobalInvocationID.x;
+    _21.outputs[ident] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u))));
+}
+
diff --git a/shaders/asm/comp/bitcast_iadd.asm.comp b/shaders/asm/comp/bitcast_iadd.asm.comp
new file mode 100644
index 00000000..62622ce3
--- /dev/null
+++ b/shaders/asm/comp/bitcast_iadd.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of IAdd
+         %result_iadd_0 = OpIAdd %uvec4 %input0 %input1
+         %result_iadd_1 = OpIAdd %uvec4 %input1 %input0
+         %result_iadd_2 = OpIAdd %uvec4 %input0 %input0
+         %result_iadd_3 = OpIAdd %uvec4 %input1 %input1
+         %result_iadd_4 = OpIAdd %ivec4 %input0 %input0
+         %result_iadd_5 = OpIAdd %ivec4 %input1 %input1
+         %result_iadd_6 = OpIAdd %ivec4 %input0 %input1
+         %result_iadd_7 = OpIAdd %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_iequal.asm.comp b/shaders/asm/comp/bitcast_iequal.asm.comp
new file mode 100644
index 00000000..c98f52c5
--- /dev/null
+++ b/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+		 %bool = OpTypeBool
+		 %bvec4 = OpTypeVector %bool 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uone = OpConstant %uint 1
+		 %uzero = OpConstant %uint 0
+		 %uvec41 = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ivec41 = OpConstantComposite %ivec4 %one %one %one %one
+		 %uvec40 = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+		 %ivec40 = OpConstantComposite %ivec4 %zero %zero %zero %zero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of IEqual
+         %result_iequal0 = OpIEqual %bvec4 %input0 %input1
+         %result_iequal1 = OpIEqual %bvec4 %input1 %input0
+         %result_iequal2 = OpIEqual %bvec4 %input0 %input0
+         %result_iequal3 = OpIEqual %bvec4 %input1 %input1
+		 %result_0       = OpSelect %uvec4 %result_iequal0 %uvec41 %uvec40
+		 %result_1       = OpSelect %uvec4 %result_iequal1 %uvec41 %uvec40
+		 %result_2       = OpSelect %uvec4 %result_iequal2 %uvec41 %uvec40
+		 %result_3       = OpSelect %uvec4 %result_iequal3 %uvec41 %uvec40
+		 %result_4       = OpSelect %ivec4 %result_iequal0 %ivec41 %ivec40
+		 %result_5       = OpSelect %ivec4 %result_iequal1 %ivec41 %ivec40
+		 %result_6       = OpSelect %ivec4 %result_iequal2 %ivec41 %ivec40
+		 %result_7       = OpSelect %ivec4 %result_iequal3 %ivec41 %ivec40
+
+			   OpStore %output_ptr_uvec4 %result_0
+			   OpStore %output_ptr_uvec4 %result_1
+			   OpStore %output_ptr_uvec4 %result_2
+			   OpStore %output_ptr_uvec4 %result_3
+			   OpStore %output_ptr_ivec4 %result_4
+			   OpStore %output_ptr_ivec4 %result_5
+			   OpStore %output_ptr_ivec4 %result_6
+			   OpStore %output_ptr_ivec4 %result_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_sar.asm.comp b/shaders/asm/comp/bitcast_sar.asm.comp
new file mode 100644
index 00000000..64f19fc3
--- /dev/null
+++ b/shaders/asm/comp/bitcast_sar.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of ShiftRightArithmetic
+         %result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1
+         %result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0
+         %result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0
+         %result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1
+         %result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0
+         %result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1
+         %result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1
+         %result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_sdiv.asm.comp b/shaders/asm/comp/bitcast_sdiv.asm.comp
new file mode 100644
index 00000000..ab73ec83
--- /dev/null
+++ b/shaders/asm/comp/bitcast_sdiv.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of SDiv
+         %result_iadd_0 = OpSDiv %uvec4 %input0 %input1
+         %result_iadd_1 = OpSDiv %uvec4 %input1 %input0
+         %result_iadd_2 = OpSDiv %uvec4 %input0 %input0
+         %result_iadd_3 = OpSDiv %uvec4 %input1 %input1
+         %result_iadd_4 = OpSDiv %ivec4 %input0 %input0
+         %result_iadd_5 = OpSDiv %ivec4 %input1 %input1
+         %result_iadd_6 = OpSDiv %ivec4 %input0 %input1
+         %result_iadd_7 = OpSDiv %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_slr.asm.comp b/shaders/asm/comp/bitcast_slr.asm.comp
new file mode 100644
index 00000000..6741f5cb
--- /dev/null
+++ b/shaders/asm/comp/bitcast_slr.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of ShiftRightLogical
+         %result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1
+         %result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0
+         %result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0
+         %result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1
+         %result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0
+         %result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1
+         %result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1
+         %result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_udiv.asm.comp b/shaders/asm/comp/bitcast_udiv.asm.comp
new file mode 100644
index 00000000..090f37e8
--- /dev/null
+++ b/shaders/asm/comp/bitcast_udiv.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of UDiv
+         %result_iadd_0 = OpUDiv %uvec4 %input0 %input1
+         %result_iadd_1 = OpUDiv %uvec4 %input1 %input0
+         %result_iadd_2 = OpUDiv %uvec4 %input0 %input0
+         %result_iadd_3 = OpUDiv %uvec4 %input1 %input1
+         %result_iadd_4 = OpUDiv %ivec4 %input0 %input0
+         %result_iadd_5 = OpUDiv %ivec4 %input1 %input1
+         %result_iadd_6 = OpUDiv %ivec4 %input0 %input1
+         %result_iadd_7 = OpUDiv %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/comp/casts.comp b/shaders/comp/casts.comp
new file mode 100644
index 00000000..6be539d7
--- /dev/null
+++ b/shaders/comp/casts.comp
@@ -0,0 +1,18 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+   ivec4 inputs[];
+};
+
+layout(binding = 1, std430) buffer SSBO1
+{
+   ivec4 outputs[];
+};
+
+void main()
+{
+   uint ident = gl_GlobalInvocationID.x;
+   outputs[ident] = ivec4(bvec4(inputs[ident] & 0x3));
+}
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 790181e4..43e146c1 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -487,6 +487,21 @@ static string extract_string(const vector<uint32_t> &spirv, uint32_t offset)
 	throw CompilerError("String was not terminated before EOF");
 }
 
+static bool is_valid_spirv_version(uint32_t version)
+{
+	switch (version)
+	{
+	// Allow v99 since it tends to just work.
+	case 99:
+	case 0x10000: // SPIR-V 1.0
+	case 0x10100: // SPIR-V 1.1
+		return true;
+
+	default:
+		return false;
+	}
+}
+
 void Compiler::parse()
 {
 	auto len = spirv.size();
@@ -502,17 +517,9 @@ void Compiler::parse()
 			          return swap_endian(c);
 			      });
 
-	// Allow v99 since it tends to just work, but warn about this.
-	if (s[0] != MagicNumber || (s[1] != Version && s[1] != 99))
+	if (s[0] != MagicNumber || !is_valid_spirv_version(s[1]))
 		throw CompilerError("Invalid SPIRV format.");
 
-	if (s[1] != Version)
-	{
-		fprintf(stderr, "SPIRV-Cross was compiled against SPIR-V version %d, but SPIR-V uses version %u. Buggy "
-		                "behavior due to ABI incompatibility might occur.\n",
-		        Version, s[1]);
-	}
-
 	uint32_t bound = s[3];
 	ids.resize(bound);
 	meta.resize(bound);
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 3f7d5744..3b2f6794 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1330,10 +1330,74 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3
 
 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
 {
-	emit_op(result_type, result_id, join(bitcast_glsl(result_type, op0), " ", op, " ", bitcast_glsl(result_type, op1)),
+	emit_op(result_type, result_id, join(to_expression(op0), " ", op, " ", to_expression(op1)),
 	        should_forward(op0) && should_forward(op1), true);
 }
 
+SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
+                                                uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
+{
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
+
+	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
+	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
+	// since equality test is exactly the same.
+	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
+
+	// Create a fake type so we can bitcast to it.
+	// We only deal with regular arithmetic types here like int, uints and so on.
+	SPIRType expected_type;
+	expected_type.basetype = input_type;
+	expected_type.vecsize = type0.vecsize;
+	expected_type.columns = type0.columns;
+	expected_type.width = type0.width;
+
+	if (cast)
+	{
+		cast_op0 = bitcast_glsl(expected_type, op0);
+		cast_op1 = bitcast_glsl(expected_type, op1);
+	}
+	else
+	{
+		// If we don't cast, our actual input type is that of the first (or second) argument.
+		cast_op0 = to_expression(op0);
+		cast_op1 = to_expression(op1);
+		input_type = type0.basetype;
+	}
+
+	return expected_type;
+}
+
+void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
+
+	// We might have casted away from the result type, so bitcast again.
+	// For example, arithmetic right shift with uint inputs.
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	bool extra_parens = true;
+	string expr;
+	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
+	{
+		expected_type.basetype = input_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(cast_op0, " ", op, " ", cast_op1);
+		expr += ')';
+		extra_parens = false;
+	}
+	else
+	{
+		expr += join(cast_op0, " ", op, " ", cast_op1);
+	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), extra_parens);
+}
+
 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
 	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), should_forward(op0), false);
@@ -1346,6 +1410,31 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id,
 	        should_forward(op0) && should_forward(op1), false);
 }
 
+void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                            const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
+
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	string expr;
+	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
+	{
+		expected_type.basetype = input_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), false);
+}
+
 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                         uint32_t op2, const char *op)
 {
@@ -1955,11 +2044,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }
 
-string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument)
+string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
-	auto &out_type = get<SPIRType>(result_type);
-	auto &in_type = expression_type(argument);
-
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
 		return type_to_glsl(out_type);
 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
@@ -1976,9 +2062,9 @@ string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument)
 		return "";
 }
 
-string CompilerGLSL::bitcast_glsl(uint32_t result_type, uint32_t argument)
+string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
 {
-	auto op = bitcast_glsl_op(result_type, argument);
+	auto op = bitcast_glsl_op(result_type, expression_type(argument));
 	if (op.empty())
 		return to_expression(argument);
 	else
@@ -2438,10 +2524,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	uint32_t length = instruction.length;
 
 #define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define BOP_CAST(op, type, skip_cast) emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
 #define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define BFOP_CAST(op, type, skip_cast) emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
+#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
 
 	switch (opcode)
@@ -2809,16 +2898,35 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpIAdd:
+	{
+		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(+, type, true);
+		break;
+	}
+
 	case OpFAdd:
 		BOP(+);
 		break;
 
 	case OpISub:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(-, type, true);
+		break;
+	}
+
 	case OpFSub:
 		BOP(-);
 		break;
 
 	case OpIMul:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(*, type, true);
+		break;
+	}
+
 	case OpFMul:
 	case OpMatrixTimesVector:
 	case OpMatrixTimesScalar:
@@ -2841,40 +2949,63 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpSDiv:
+		BOP_CAST(/, SPIRType::Int, false);
+		break;
+
 	case OpUDiv:
+		BOP_CAST(/, SPIRType::UInt, false);
+		break;
+
 	case OpFDiv:
 		BOP(/ );
 		break;
 
-	// Might need workaround if RightLocal can be used on signed types ...
 	case OpShiftRightLogical:
+		BOP_CAST(>>, SPIRType::UInt, false);
+		break;
+
 	case OpShiftRightArithmetic:
-		BOP(>> );
+		BOP_CAST(>>, SPIRType::Int, false);
 		break;
 
 	case OpShiftLeftLogical:
-		BOP(<< );
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(<<, type, true);
 		break;
+	}
 
 	case OpBitwiseOr:
-		BOP(| );
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(|, type, true);
 		break;
+	}
 
 	case OpBitwiseXor:
-		BOP (^);
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST (^, type, true);
 		break;
+	}
 
 	case OpBitwiseAnd:
-		BOP(&);
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(&, type, true);
 		break;
+	}
 
 	case OpNot:
 		UOP(~);
 		break;
 
 	case OpUMod:
+		BOP_CAST(%, SPIRType::UInt, false);
+		break;
+
 	case OpSMod:
-		BOP(% );
+		BOP_CAST(%, SPIRType::Int, false);
 		break;
 
 	case OpFMod:
@@ -2906,8 +3037,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		UOP(!);
 		break;
 
-	case OpLogicalEqual:
 	case OpIEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(equal, SPIRType::Int, true);
+		else
+			BOP_CAST(==, SPIRType::Int, true);
+		break;
+	}
+
+	case OpLogicalEqual:
 	case OpFOrdEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2917,8 +3056,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
-	case OpLogicalNotEqual:
 	case OpINotEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(notEqual, SPIRType::Int, true);
+		else
+			BOP_CAST(!=, SPIRType::Int, true);
+		break;
+	}
+
+	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2930,6 +3077,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	case OpUGreaterThan:
 	case OpSGreaterThan:
+	{
+		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(greaterThan, type, false);
+		else
+			BOP_CAST(>, type, false);
+		break;
+	}
+
 	case OpFOrdGreaterThan:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2941,6 +3097,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
+	{
+		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(greaterThanEqual, type, false);
+		else
+			BOP_CAST(>=, type, false);
+		break;
+	}
+
 	case OpFOrdGreaterThanEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2952,6 +3117,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	case OpULessThan:
 	case OpSLessThan:
+	{
+		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(lessThan, type, false);
+		else
+			BOP_CAST(<, type, false);
+		break;
+	}
+
 	case OpFOrdLessThan:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2963,6 +3137,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
+	{
+		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(lessThanEqual, type, false);
+		else
+			BOP_CAST(<=, type, false);
+		break;
+	}
+
 	case OpFOrdLessThanEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@@ -2995,7 +3178,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t arg = ops[2];
 
-		auto op = bitcast_glsl_op(result_type, arg);
+		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
 		emit_unary_func_op(result_type, id, arg, op.c_str());
 		break;
 	}
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 7c8657ef..e744e54c 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -236,8 +236,16 @@ protected:
 	void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                          const char *op);
 	void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                              SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
+
+	SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type,
+	                                  uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type);
+
 	void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	bool expression_is_forwarded(uint32_t id);
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
@@ -265,8 +273,8 @@ protected:
 	uint32_t type_to_std430_array_stride(const SPIRType &type, uint64_t flags);
 	uint32_t type_to_std430_size(const SPIRType &type, uint64_t flags);
 
-	std::string bitcast_glsl(uint32_t result_type, uint32_t arg);
-	std::string bitcast_glsl_op(uint32_t result_type, uint32_t arg);
+	std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg);
+	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type);
 	std::string build_composite_combiner(const uint32_t *elems, uint32_t length);
 	bool remove_duplicate_swizzle(std::string &op);
 	bool remove_unity_swizzle(uint32_t base, std::string &op);
diff --git a/test_shaders.py b/test_shaders.py
index 099b0373..68785eee 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -66,26 +66,32 @@ def validate_shader(shader, vulkan):
     else:
         subprocess.check_call(['glslangValidator', shader])
 
-def cross_compile(shader, vulkan):
+def cross_compile(shader, vulkan, spirv):
     spirv_f, spirv_path = tempfile.mkstemp()
     glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
     os.close(spirv_f)
     os.close(glsl_f)
 
-    if vulkan:
+    if vulkan or spirv:
         vulkan_glsl_f, vulkan_glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
         os.close(vulkan_glsl_f)
 
-    subprocess.check_call(['glslangValidator', '-V' if vulkan else '-G', '-o', spirv_path, shader])
+    if spirv:
+        subprocess.check_call(['spirv-as', '-o', spirv_path, shader])
+    else:
+        subprocess.check_call(['glslangValidator', '-V' if vulkan else '-G', '-o', spirv_path, shader])
+
+    if spirv:
+        subprocess.check_call(['spirv-val', spirv_path])
 
     spirv_cross_path = './spirv-cross'
     subprocess.check_call([spirv_cross_path, '--output', glsl_path, spirv_path])
 
     # A shader might not be possible to make valid GLSL from, skip validation for this case.
-    if not ('nocompat' in glsl_path):
+    if (not ('nocompat' in glsl_path)) and (not spirv):
         validate_shader(glsl_path, False)
 
-    if vulkan:
+    if vulkan or spirv:
         subprocess.check_call([spirv_cross_path, '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path])
         validate_shader(vulkan_glsl_path, vulkan)
 
@@ -139,15 +145,19 @@ def regression_check(shader, glsl, update, keep):
 def shader_is_vulkan(shader):
     return '.vk.' in shader
 
+def shader_is_spirv(shader):
+    return '.asm.' in shader
+
 def test_shader(stats, shader, update, keep):
     joined_path = os.path.join(shader[0], shader[1])
     vulkan = shader_is_vulkan(shader[1])
+    spirv = shader_is_spirv(shader[1])
 
     print('Testing shader:', joined_path)
-    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan)
+    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, spirv)
 
     # Only test GLSL stats if we have a shader following GL semantics.
-    if stats and (not vulkan):
+    if stats and (not vulkan) and (not spirv):
         cross_stats = get_shader_stats(glsl)
 
     regression_check(shader, glsl, update, keep)