Implement more correct integer op handling.

In some cases we need to bitcast when dealing with int vs. uint. SPIR-V allows inputs to be of different integer signedness, so we need to deal with this somehow. Add testing system to test SPIR-V assembly. For now, test all possible combination for all major cases. - IAdd (which doesn't care about input type as long as they're equal) - SDiv/UDiv operations which case about input type. - Arith/Logical right shifts. - IEqual to test outputs to bvec, which shouldn't get output cast. Also tests casting in function-like calls.
2016-05-10 23:39:41 +02:00 · 2016-05-10 23:39:41 +02:00 · 45ad58a903
commit 45ad58a903
parent 8869a167d6
19 changed files with 920 additions and 37 deletions
--- a/4
+++ b/4
@ -13,9 +13,9 @@ DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d)
 CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow

 ifeq ($(DEBUG), 1)
-	CXXFLAGS += -O0
+	CXXFLAGS += -O0 -g
 else
-	CXXFLAGS += -O2
+	CXXFLAGS += -O2 -g
 endif

 all: $(TARGET)
--- a/reference/shaders/asm/comp/bitcast_iadd.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_iadd.asm.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 + uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) + _5._1);
+    _6._0 = (_5._1 + _5._1);
+    _6._0 = uvec4(_5._0 + _5._0);
+    _6._1 = ivec4(_5._1 + _5._1);
+    _6._1 = (_5._0 + _5._0);
+    _6._1 = (ivec4(_5._1) + _5._0);
+    _6._1 = (_5._0 + ivec4(_5._1));
+}
+
--- a/reference/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
@ -0,0 +1,31 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    bvec4 _34 = equal(ivec4(_5._1), _5._0);
+    bvec4 _35 = equal(_5._0, ivec4(_5._1));
+    bvec4 _36 = equal(_5._1, _5._1);
+    bvec4 _37 = equal(_5._0, _5._0);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _34);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _35);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _36);
+    _6._0 = mix(uvec4(0u), uvec4(1u), _37);
+    _6._1 = mix(ivec4(0), ivec4(1), _34);
+    _6._1 = mix(ivec4(0), ivec4(1), _35);
+    _6._1 = mix(ivec4(0), ivec4(1), _36);
+    _6._1 = mix(ivec4(0), ivec4(1), _37);
+}
+
--- a/reference/shaders/asm/comp/bitcast_sar.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_sar.asm.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = uvec4(ivec4(_5._1) >> _5._0);
+    _6._0 = uvec4(_5._0 >> ivec4(_5._1));
+    _6._0 = uvec4(ivec4(_5._1) >> ivec4(_5._1));
+    _6._0 = uvec4(_5._0 >> _5._0);
+    _6._1 = (ivec4(_5._1) >> ivec4(_5._1));
+    _6._1 = (_5._0 >> _5._0);
+    _6._1 = (ivec4(_5._1) >> _5._0);
+    _6._1 = (_5._0 >> ivec4(_5._1));
+}
+
--- a/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = uvec4(ivec4(_5._1) / _5._0);
+    _6._0 = uvec4(_5._0 / ivec4(_5._1));
+    _6._0 = uvec4(ivec4(_5._1) / ivec4(_5._1));
+    _6._0 = uvec4(_5._0 / _5._0);
+    _6._1 = (ivec4(_5._1) / ivec4(_5._1));
+    _6._1 = (_5._0 / _5._0);
+    _6._1 = (ivec4(_5._1) / _5._0);
+    _6._1 = (_5._0 / ivec4(_5._1));
+}
+
--- a/reference/shaders/asm/comp/bitcast_slr.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_slr.asm.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 >> uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) >> _5._1);
+    _6._0 = (_5._1 >> _5._1);
+    _6._0 = (uvec4(_5._0) >> uvec4(_5._0));
+    _6._1 = ivec4(_5._1 >> _5._1);
+    _6._1 = ivec4(uvec4(_5._0) >> uvec4(_5._0));
+    _6._1 = ivec4(_5._1 >> uvec4(_5._0));
+    _6._1 = ivec4(uvec4(_5._0) >> _5._1);
+}
+
--- a/reference/shaders/asm/comp/bitcast_udiv.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_udiv.asm.comp
@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _3
+{
+    ivec4 _0;
+    uvec4 _1;
+} _5;
+
+layout(binding = 1, std430) buffer _4
+{
+    uvec4 _0;
+    ivec4 _1;
+} _6;
+
+void main()
+{
+    _6._0 = (_5._1 / uvec4(_5._0));
+    _6._0 = (uvec4(_5._0) / _5._1);
+    _6._0 = (_5._1 / _5._1);
+    _6._0 = (uvec4(_5._0) / uvec4(_5._0));
+    _6._1 = ivec4(_5._1 / _5._1);
+    _6._1 = ivec4(uvec4(_5._0) / uvec4(_5._0));
+    _6._1 = ivec4(_5._1 / uvec4(_5._0));
+    _6._1 = ivec4(uvec4(_5._0) / _5._1);
+}
+
--- a/reference/shaders/comp/casts.comp
+++ b/reference/shaders/comp/casts.comp
@ -0,0 +1,19 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    ivec4 outputs[];
+} _21;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    ivec4 inputs[];
+} _27;
+
+void main()
+{
+    uint ident = gl_GlobalInvocationID.x;
+    _21.outputs[ident] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u))));
+}
+
--- a/shaders/asm/comp/bitcast_iadd.asm.comp
+++ b/shaders/asm/comp/bitcast_iadd.asm.comp
@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of IAdd
+         %result_iadd_0 = OpIAdd %uvec4 %input0 %input1
+         %result_iadd_1 = OpIAdd %uvec4 %input1 %input0
+         %result_iadd_2 = OpIAdd %uvec4 %input0 %input0
+         %result_iadd_3 = OpIAdd %uvec4 %input1 %input1
+         %result_iadd_4 = OpIAdd %ivec4 %input0 %input0
+         %result_iadd_5 = OpIAdd %ivec4 %input1 %input1
+         %result_iadd_6 = OpIAdd %ivec4 %input0 %input1
+         %result_iadd_7 = OpIAdd %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/shaders/asm/comp/bitcast_iequal.asm.comp
@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+		 %bool = OpTypeBool
+		 %bvec4 = OpTypeVector %bool 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uone = OpConstant %uint 1
+		 %uzero = OpConstant %uint 0
+		 %uvec41 = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ivec41 = OpConstantComposite %ivec4 %one %one %one %one
+		 %uvec40 = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+		 %ivec40 = OpConstantComposite %ivec4 %zero %zero %zero %zero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of IEqual
+         %result_iequal0 = OpIEqual %bvec4 %input0 %input1
+         %result_iequal1 = OpIEqual %bvec4 %input1 %input0
+         %result_iequal2 = OpIEqual %bvec4 %input0 %input0
+         %result_iequal3 = OpIEqual %bvec4 %input1 %input1
+		 %result_0       = OpSelect %uvec4 %result_iequal0 %uvec41 %uvec40
+		 %result_1       = OpSelect %uvec4 %result_iequal1 %uvec41 %uvec40
+		 %result_2       = OpSelect %uvec4 %result_iequal2 %uvec41 %uvec40
+		 %result_3       = OpSelect %uvec4 %result_iequal3 %uvec41 %uvec40
+		 %result_4       = OpSelect %ivec4 %result_iequal0 %ivec41 %ivec40
+		 %result_5       = OpSelect %ivec4 %result_iequal1 %ivec41 %ivec40
+		 %result_6       = OpSelect %ivec4 %result_iequal2 %ivec41 %ivec40
+		 %result_7       = OpSelect %ivec4 %result_iequal3 %ivec41 %ivec40
+
+			   OpStore %output_ptr_uvec4 %result_0
+			   OpStore %output_ptr_uvec4 %result_1
+			   OpStore %output_ptr_uvec4 %result_2
+			   OpStore %output_ptr_uvec4 %result_3
+			   OpStore %output_ptr_ivec4 %result_4
+			   OpStore %output_ptr_ivec4 %result_5
+			   OpStore %output_ptr_ivec4 %result_6
+			   OpStore %output_ptr_ivec4 %result_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/bitcast_sar.asm.comp
+++ b/shaders/asm/comp/bitcast_sar.asm.comp
@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of ShiftRightArithmetic
+         %result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1
+         %result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0
+         %result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0
+         %result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1
+         %result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0
+         %result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1
+         %result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1
+         %result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/bitcast_sdiv.asm.comp
+++ b/shaders/asm/comp/bitcast_sdiv.asm.comp
@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of SDiv
+         %result_iadd_0 = OpSDiv %uvec4 %input0 %input1
+         %result_iadd_1 = OpSDiv %uvec4 %input1 %input0
+         %result_iadd_2 = OpSDiv %uvec4 %input0 %input0
+         %result_iadd_3 = OpSDiv %uvec4 %input1 %input1
+         %result_iadd_4 = OpSDiv %ivec4 %input0 %input0
+         %result_iadd_5 = OpSDiv %ivec4 %input1 %input1
+         %result_iadd_6 = OpSDiv %ivec4 %input0 %input1
+         %result_iadd_7 = OpSDiv %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/bitcast_slr.asm.comp
+++ b/shaders/asm/comp/bitcast_slr.asm.comp
@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of ShiftRightLogical
+         %result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1
+         %result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0
+         %result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0
+         %result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1
+         %result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0
+         %result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1
+         %result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1
+         %result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/bitcast_udiv.asm.comp
+++ b/shaders/asm/comp/bitcast_udiv.asm.comp
@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+         %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
+
+; Test all variants of UDiv
+         %result_iadd_0 = OpUDiv %uvec4 %input0 %input1
+         %result_iadd_1 = OpUDiv %uvec4 %input1 %input0
+         %result_iadd_2 = OpUDiv %uvec4 %input0 %input0
+         %result_iadd_3 = OpUDiv %uvec4 %input1 %input1
+         %result_iadd_4 = OpUDiv %ivec4 %input0 %input0
+         %result_iadd_5 = OpUDiv %ivec4 %input1 %input1
+         %result_iadd_6 = OpUDiv %ivec4 %input0 %input1
+         %result_iadd_7 = OpUDiv %ivec4 %input1 %input0
+			   OpStore %output_ptr_uvec4 %result_iadd_0
+			   OpStore %output_ptr_uvec4 %result_iadd_1
+			   OpStore %output_ptr_uvec4 %result_iadd_2
+			   OpStore %output_ptr_uvec4 %result_iadd_3
+			   OpStore %output_ptr_ivec4 %result_iadd_4
+			   OpStore %output_ptr_ivec4 %result_iadd_5
+			   OpStore %output_ptr_ivec4 %result_iadd_6
+			   OpStore %output_ptr_ivec4 %result_iadd_7
+
+               OpReturn
+               OpFunctionEnd
--- a/shaders/comp/casts.comp
+++ b/shaders/comp/casts.comp
@ -0,0 +1,18 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+   ivec4 inputs[];
+};
+
+layout(binding = 1, std430) buffer SSBO1
+{
+   ivec4 outputs[];
+};
+
+void main()
+{
+   uint ident = gl_GlobalInvocationID.x;
+   outputs[ident] = ivec4(bvec4(inputs[ident] & 0x3));
+}
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -487,6 +487,21 @@ static string extract_string(const vector<uint32_t> &spirv, uint32_t offset)
 	throw CompilerError("String was not terminated before EOF");
 }

+static bool is_valid_spirv_version(uint32_t version)
+{
+	switch (version)
+	{
+	// Allow v99 since it tends to just work.
+	case 99:
+	case 0x10000: // SPIR-V 1.0
+	case 0x10100: // SPIR-V 1.1
+		return true;
+
+	default:
+		return false;
+	}
+}
+
 void Compiler::parse()
 {
 	auto len = spirv.size();
@ -502,17 +517,9 @@ void Compiler::parse()
 			          return swap_endian(c);
 			      });

-	// Allow v99 since it tends to just work, but warn about this.
-	if (s[0] != MagicNumber || (s[1] != Version && s[1] != 99))
+	if (s[0] != MagicNumber || !is_valid_spirv_version(s[1]))
 		throw CompilerError("Invalid SPIRV format.");

-	if (s[1] != Version)
-	{
-		fprintf(stderr, "SPIRV-Cross was compiled against SPIR-V version %d, but SPIR-V uses version %u. Buggy "
-		                "behavior due to ABI incompatibility might occur.\n",
-		        Version, s[1]);
-	}
-
 	uint32_t bound = s[3];
 	ids.resize(bound);
 	meta.resize(bound);
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -1330,10 +1330,74 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3

 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
 {
-	emit_op(result_type, result_id, join(bitcast_glsl(result_type, op0), " ", op, " ", bitcast_glsl(result_type, op1)),
+	emit_op(result_type, result_id, join(to_expression(op0), " ", op, " ", to_expression(op1)),
 	        should_forward(op0) && should_forward(op1), true);
 }

+SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
+                                                uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
+{
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
+
+	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
+	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
+	// since equality test is exactly the same.
+	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
+
+	// Create a fake type so we can bitcast to it.
+	// We only deal with regular arithmetic types here like int, uints and so on.
+	SPIRType expected_type;
+	expected_type.basetype = input_type;
+	expected_type.vecsize = type0.vecsize;
+	expected_type.columns = type0.columns;
+	expected_type.width = type0.width;
+
+	if (cast)
+	{
+		cast_op0 = bitcast_glsl(expected_type, op0);
+		cast_op1 = bitcast_glsl(expected_type, op1);
+	}
+	else
+	{
+		// If we don't cast, our actual input type is that of the first (or second) argument.
+		cast_op0 = to_expression(op0);
+		cast_op1 = to_expression(op1);
+		input_type = type0.basetype;
+	}
+
+	return expected_type;
+}
+
+void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
+
+	// We might have casted away from the result type, so bitcast again.
+	// For example, arithmetic right shift with uint inputs.
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	bool extra_parens = true;
+	string expr;
+	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
+	{
+		expected_type.basetype = input_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(cast_op0, " ", op, " ", cast_op1);
+		expr += ')';
+		extra_parens = false;
+	}
+	else
+	{
+		expr += join(cast_op0, " ", op, " ", cast_op1);
+	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), extra_parens);
+}
+
 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
 	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), should_forward(op0), false);
@ -1346,6 +1410,31 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id,
 	        should_forward(op0) && should_forward(op1), false);
 }

+void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                            const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
+
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	string expr;
+	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
+	{
+		expected_type.basetype = input_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), false);
+}
+
 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                        uint32_t op2, const char *op)
 {
@ -1955,11 +2044,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }

-string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument)
+string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
-	auto &out_type = get<SPIRType>(result_type);
-	auto &in_type = expression_type(argument);
-
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
 		return type_to_glsl(out_type);
 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
@ -1976,9 +2062,9 @@ string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument)
 		return "";
 }

-string CompilerGLSL::bitcast_glsl(uint32_t result_type, uint32_t argument)
+string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
 {
-	auto op = bitcast_glsl_op(result_type, argument);
+	auto op = bitcast_glsl_op(result_type, expression_type(argument));
 	if (op.empty())
 		return to_expression(argument);
 	else
@ -2438,10 +2524,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	uint32_t length = instruction.length;

 #define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define BOP_CAST(op, type, skip_cast) emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
 #define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define BFOP_CAST(op, type, skip_cast) emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
+#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)

 	switch (opcode)
@ -2809,16 +2898,35 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;

 	case OpIAdd:
+	{
+		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(+, type, true);
+		break;
+	}
+
 	case OpFAdd:
 		BOP(+);
 		break;

 	case OpISub:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(-, type, true);
+		break;
+	}
+
 	case OpFSub:
 		BOP(-);
 		break;

 	case OpIMul:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(*, type, true);
+		break;
+	}
+
 	case OpFMul:
 	case OpMatrixTimesVector:
 	case OpMatrixTimesScalar:
@ -2841,40 +2949,60 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;

 	case OpSDiv:
+		BOP_CAST(/, SPIRType::Int, false);
+		break;
+
 	case OpUDiv:
+		BOP_CAST(/, SPIRType::UInt, false);
+		break;
+
 	case OpFDiv:
 		BOP(/ );
 		break;

-	// Might need workaround if RightLocal can be used on signed types ...
 	case OpShiftRightLogical:
+		BOP_CAST(>>, SPIRType::UInt, false);
+		break;
+
 	case OpShiftRightArithmetic:
-		BOP(>> );
+		BOP_CAST(>>, SPIRType::Int, false);
 		break;

 	case OpShiftLeftLogical:
-		BOP(<< );
+		BOP_CAST(<<, SPIRType::UInt, true);
 		break;

 	case OpBitwiseOr:
-		BOP(| );
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(|, type, true);
 		break;
+	}

 	case OpBitwiseXor:
-		BOP (^);
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST (^, type, true);
 		break;
+	}

 	case OpBitwiseAnd:
-		BOP(&);
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		BOP_CAST(&, type, true);
 		break;
+	}

 	case OpNot:
 		UOP(~);
 		break;

 	case OpUMod:
+		BOP_CAST(%, SPIRType::UInt, false);
+		break;
+
 	case OpSMod:
-		BOP(% );
+		BOP_CAST(%, SPIRType::Int, false);
 		break;

 	case OpFMod:
@ -2906,8 +3034,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		UOP(!);
 		break;

-	case OpLogicalEqual:
 	case OpIEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(equal, SPIRType::Int, true);
+		else
+			BOP_CAST(==, SPIRType::Int, true);
+		break;
+	}
+
+	case OpLogicalEqual:
 	case OpFOrdEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2917,8 +3053,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}

-	case OpLogicalNotEqual:
 	case OpINotEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(notEqual, SPIRType::Int, true);
+		else
+			BOP_CAST(!=, SPIRType::Int, true);
+		break;
+	}
+
+	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2930,6 +3074,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)

 	case OpUGreaterThan:
 	case OpSGreaterThan:
+	{
+		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(greaterThan, type, false);
+		else
+			BOP_CAST(>, type, false);
+		break;
+	}
+
 	case OpFOrdGreaterThan:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2941,6 +3094,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)

 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
+	{
+		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(greaterThanEqual, type, false);
+		else
+			BOP_CAST(>=, type, false);
+		break;
+	}
+
 	case OpFOrdGreaterThanEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2952,6 +3114,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)

 	case OpULessThan:
 	case OpSLessThan:
+	{
+		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(lessThan, type, false);
+		else
+			BOP_CAST(<, type, false);
+		break;
+	}
+
 	case OpFOrdLessThan:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2963,6 +3134,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)

 	case OpULessThanEqual:
 	case OpSLessThanEqual:
+	{
+		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		if (expression_type(ops[2]).vecsize > 1)
+			BFOP_CAST(lessThanEqual, type, false);
+		else
+			BOP_CAST(<=, type, false);
+		break;
+	}
+
 	case OpFOrdLessThanEqual:
 	{
 		if (expression_type(ops[2]).vecsize > 1)
@ -2995,7 +3175,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t arg = ops[2];

-		auto op = bitcast_glsl_op(result_type, arg);
+		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
 		emit_unary_func_op(result_type, id, arg, op.c_str());
 		break;
 	}
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -236,8 +236,16 @@ protected:
 	void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                          const char *op);
 	void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                              SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
+
+	SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type,
+	                                  uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type);
+
 	void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	bool expression_is_forwarded(uint32_t id);
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
@ -265,8 +273,8 @@ protected:
 	uint32_t type_to_std430_array_stride(const SPIRType &type, uint64_t flags);
 	uint32_t type_to_std430_size(const SPIRType &type, uint64_t flags);

-	std::string bitcast_glsl(uint32_t result_type, uint32_t arg);
-	std::string bitcast_glsl_op(uint32_t result_type, uint32_t arg);
+	std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg);
+	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type);
 	std::string build_composite_combiner(const uint32_t *elems, uint32_t length);
 	bool remove_duplicate_swizzle(std::string &op);
 	bool remove_unity_swizzle(uint32_t base, std::string &op);
--- a/test_shaders.py
+++ b/test_shaders.py
@ -66,26 +66,32 @@ def validate_shader(shader, vulkan):
    else:
        subprocess.check_call(['glslangValidator', shader])

-def cross_compile(shader, vulkan):
+def cross_compile(shader, vulkan, spirv):
    spirv_f, spirv_path = tempfile.mkstemp()
    glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
    os.close(spirv_f)
    os.close(glsl_f)

-    if vulkan:
+    if vulkan or spirv:
        vulkan_glsl_f, vulkan_glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
        os.close(vulkan_glsl_f)

-    subprocess.check_call(['glslangValidator', '-V' if vulkan else '-G', '-o', spirv_path, shader])
+    if spirv:
+        subprocess.check_call(['spirv-as', '-o', spirv_path, shader])
+    else:
+        subprocess.check_call(['glslangValidator', '-V' if vulkan else '-G', '-o', spirv_path, shader])
+
+    if spirv:
+        subprocess.check_call(['spirv-val', spirv_path])

    spirv_cross_path = './spirv-cross'
    subprocess.check_call([spirv_cross_path, '--output', glsl_path, spirv_path])

    # A shader might not be possible to make valid GLSL from, skip validation for this case.
-    if not ('nocompat' in glsl_path):
+    if (not ('nocompat' in glsl_path)) and (not spirv):
        validate_shader(glsl_path, False)

-    if vulkan:
+    if vulkan or spirv:
        subprocess.check_call([spirv_cross_path, '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path])
        validate_shader(vulkan_glsl_path, vulkan)

@ -139,15 +145,19 @@ def regression_check(shader, glsl, update, keep):
 def shader_is_vulkan(shader):
    return '.vk.' in shader

+def shader_is_spirv(shader):
+    return '.asm.' in shader
+
 def test_shader(stats, shader, update, keep):
    joined_path = os.path.join(shader[0], shader[1])
    vulkan = shader_is_vulkan(shader[1])
+    spirv = shader_is_spirv(shader[1])

    print('Testing shader:', joined_path)
-    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan)
+    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, spirv)

    # Only test GLSL stats if we have a shader following GL semantics.
-    if stats and (not vulkan):
+    if stats and (not vulkan) and (not spirv):
        cross_stats = get_shader_stats(glsl)

    regression_check(shader, glsl, update, keep)