From 14a4b087fbaa06d2780668816baa3fbdffd56e83 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Mon, 14 Oct 2019 13:48:22 +0200
Subject: [PATCH 1/4] GLSL: Support unordered floating point compare.

There is no direct way to express this, so invert boolean results to
force any NaN -> true. glslang emits Ordered compare instructions
everywhere, and the GLSL spec is not clear on this, so assume this is
fine.
---
 .../asm/frag/unordered-compare.asm.frag       |  33 ++++
 .../asm/frag/unordered-compare.asm.frag       | 177 ++++++++++++++++++
 spirv_glsl.cpp                                | 102 ++++++++--
 3 files changed, 296 insertions(+), 16 deletions(-)
 create mode 100644 reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
 create mode 100644 shaders-no-opt/asm/frag/unordered-compare.asm.frag

diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000..2cc81295
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,33 @@
+#version 450
+
+layout(location = 0) in vec4 A;
+layout(location = 1) in vec4 B;
+layout(location = 0) out vec4 FragColor;
+
+vec4 test_vector()
+{
+    bvec4 le = not(greaterThanEqual(A, B));
+    bvec4 leq = not(greaterThan(A, B));
+    bvec4 ge = not(lessThanEqual(A, B));
+    bvec4 geq = not(lessThan(A, B));
+    bvec4 eq = not(notEqual(A, B));
+    bvec4 neq = not(equal(A, B));
+    return ((((mix(vec4(0.0), vec4(1.0), le) + mix(vec4(0.0), vec4(1.0), leq)) + mix(vec4(0.0), vec4(1.0), ge)) + mix(vec4(0.0), vec4(1.0), geq)) + mix(vec4(0.0), vec4(1.0), eq)) + mix(vec4(0.0), vec4(1.0), neq);
+}
+
+float test_scalar()
+{
+    bool le = !(A.x >= B.x);
+    bool leq = !(A.x > B.x);
+    bool ge = !(A.x <= B.x);
+    bool geq = !(A.x < B.x);
+    bool eq = !(A.x != B.x);
+    bool neq = !(A.x == B.x);
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void main()
+{
+    FragColor = test_vector() + vec4(test_scalar());
+}
+
diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000..4ad8fc5a
--- /dev/null
+++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 0dd234eb..34c95d87 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -10073,28 +10073,98 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFUnordEqual:
-		GLSL_BFOP(unsupported_FUnordEqual);
-		break;
-
 	case OpFUnordNotEqual:
-		GLSL_BFOP(unsupported_FUnordNotEqual);
-		break;
-
 	case OpFUnordLessThan:
-		GLSL_BFOP(unsupported_FUnordLessThan);
-		break;
-
 	case OpFUnordGreaterThan:
-		GLSL_BFOP(unsupported_FUnordGreaterThan);
-		break;
-
 	case OpFUnordLessThanEqual:
-		GLSL_BFOP(unsupported_FUnordLessThanEqual);
-		break;
-
 	case OpFUnordGreaterThanEqual:
-		GLSL_BFOP(unsupported_FUnordGreaterThanEqual);
+	{
+		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
+		// but glslang always emits ordered floating point compares for GLSL.
+		// To get unordered compares, we can test the opposite thing and invert the result.
+		// This way, we force true when there is any NaN present.
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+
+		string expr;
+		if (expression_type(op0).vecsize > 1)
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = "notEqual";
+				break;
+
+			case OpFUnordNotEqual:
+				comp_op = "equal";
+				break;
+
+			case OpFUnordLessThan:
+				comp_op = "greaterThanEqual";
+				break;
+
+			case OpFUnordLessThanEqual:
+				comp_op = "greaterThan";
+				break;
+
+			case OpFUnordGreaterThan:
+				comp_op = "lessThanEqual";
+				break;
+
+			case OpFUnordGreaterThanEqual:
+				comp_op = "lessThan";
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
+		}
+		else
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = " != ";
+				break;
+
+			case OpFUnordNotEqual:
+				comp_op = " == ";
+				break;
+
+			case OpFUnordLessThan:
+				comp_op = " >= ";
+				break;
+
+			case OpFUnordLessThanEqual:
+				comp_op = " > ";
+				break;
+
+			case OpFUnordGreaterThan:
+				comp_op = " <= ";
+				break;
+
+			case OpFUnordGreaterThanEqual:
+				comp_op = " < ";
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
+		}
+
+		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
+		inherit_expression_dependencies(ops[1], op0);
+		inherit_expression_dependencies(ops[1], op1);
 		break;
+	}
 
 	case OpReportIntersectionNV:
 		statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");

From b960ae3b7089c216b1647ba07c7f7b2a62f11a90 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Mon, 14 Oct 2019 15:04:00 +0200
Subject: [PATCH 2/4] HLSL: Partially implement Unordered compare.

We cannot correctly implement unordered equal/ordered not equal without
a lot of extra instructions which slows normal code down.
---
 .../asm/frag/unordered-compare.asm.frag       |  51 +++++
 .../asm/frag/unordered-compare.asm.frag       | 177 ++++++++++++++++++
 spirv_glsl.cpp                                |  10 +-
 spirv_glsl.hpp                                |   2 +-
 spirv_hlsl.cpp                                |  90 +++++++--
 5 files changed, 314 insertions(+), 16 deletions(-)
 create mode 100644 reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
 create mode 100644 shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag

diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000..1143ade7
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,51 @@
+static float4 A;
+static float4 B;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 A : TEXCOORD0;
+    float4 B : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 test_vector()
+{
+    bool4 le = bool4(!(A.x >= B.x), !(A.y >= B.y), !(A.z >= B.z), !(A.w >= B.w));
+    bool4 leq = bool4(!(A.x > B.x), !(A.y > B.y), !(A.z > B.z), !(A.w > B.w));
+    bool4 ge = bool4(!(A.x <= B.x), !(A.y <= B.y), !(A.z <= B.z), !(A.w <= B.w));
+    bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w));
+    bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
+    bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    return ((((float4(le.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, le.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, le.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, le.w ? 1.0f.xxxx.w : 0.0f.xxxx.w) + float4(leq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, leq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, leq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, leq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(ge.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, ge.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, ge.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, ge.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(geq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, geq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, geq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, geq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(eq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, eq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, eq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, eq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(neq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, neq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, neq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, neq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w);
+}
+
+float test_scalar()
+{
+    bool le = !(A.x >= B.x);
+    bool leq = !(A.x > B.x);
+    bool ge = !(A.x <= B.x);
+    bool geq = !(A.x < B.x);
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void frag_main()
+{
+    FragColor = test_vector() + test_scalar().xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    A = stage_input.A;
+    B = stage_input.B;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000..4ad8fc5a
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 34c95d87..e7fb3fe3 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -4159,7 +4159,7 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_
 }
 
 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                           const char *op)
+                                           const char *op, bool negate)
 {
 	auto &type = get<SPIRType>(result_type);
 	auto expr = type_to_glsl_constructor(type);
@@ -4168,11 +4168,15 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result
 	{
 		// Make sure to call to_expression multiple times to ensure
 		// that these expressions are properly flushed to temporaries if needed.
+		if (negate)
+			expr += "!(";
 		expr += to_extract_component_expression(op0, i);
 		expr += ' ';
 		expr += op;
 		expr += ' ';
 		expr += to_extract_component_expression(op1, i);
+		if (negate)
+			expr += ")";
 
 		if (i + 1 < type.vecsize)
 			expr += ", ";
@@ -8838,7 +8842,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false);
 		else
 			GLSL_BOP(||);
 		break;
@@ -8852,7 +8856,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false);
 		else
 			GLSL_BOP(&&);
 		break;
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index e989a790..ddce638b 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -471,7 +471,7 @@ protected:
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
-	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate);
 	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
 	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 1946096e..8241d3b6 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -4194,7 +4194,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false);
 		else
 			HLSL_BOP_CAST(==, int_type);
 		break;
@@ -4202,12 +4202,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalEqual:
 	case OpFOrdEqual:
+	case OpFUnordEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+		// We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
+		// HACK: FUnordEqual will be implemented as FOrdEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false);
 		else
 			HLSL_BOP(==);
 		break;
@@ -4219,7 +4226,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false);
 		else
 			HLSL_BOP_CAST(!=, int_type);
 		break;
@@ -4227,12 +4234,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+	case OpFUnordNotEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+
+		// FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
+		// We would need to do something like not(UnordEqual), but that cannot be expressed either.
+		// Adding a lot of NaN checks would be a breaking change from perspective of performance.
+		// SPIR-V will generally use isnan() checks when this even matters.
+		// HACK: FOrdNotEqual will be implemented as FUnordEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false);
 		else
 			HLSL_BOP(!=);
 		break;
@@ -4246,7 +4264,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false);
 		else
 			HLSL_BOP_CAST(>, type);
 		break;
@@ -4258,12 +4276,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false);
 		else
 			HLSL_BOP(>);
 		break;
 	}
 
+	case OpFUnordGreaterThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
 	{
@@ -4272,7 +4302,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false);
 		else
 			HLSL_BOP_CAST(>=, type);
 		break;
@@ -4284,12 +4314,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false);
 		else
 			HLSL_BOP(>=);
 		break;
 	}
 
+	case OpFUnordGreaterThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThan:
 	case OpSLessThan:
 	{
@@ -4298,7 +4340,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false);
 		else
 			HLSL_BOP_CAST(<, type);
 		break;
@@ -4310,12 +4352,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false);
 		else
 			HLSL_BOP(<);
 		break;
 	}
 
+	case OpFUnordLessThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
 	{
@@ -4324,7 +4378,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false);
 		else
 			HLSL_BOP_CAST(<=, type);
 		break;
@@ -4336,12 +4390,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false);
 		else
 			HLSL_BOP(<=);
 		break;
 	}
 
+	case OpFUnordLessThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpImageQueryLod:
 		emit_texture_op(instruction);
 		break;

From 3bf9fa7ed6b11fca301a60c0076d4caab4d4e782 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Mon, 14 Oct 2019 15:23:38 +0200
Subject: [PATCH 3/4] GLSL: Deal correctly with bitwidth on integer compares.

---
 spirv_glsl.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index e7fb3fe3..ca420d5c 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -8913,7 +8913,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpUGreaterThan:
 	case OpSGreaterThan:
 	{
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(greaterThan, type);
 		else
@@ -8933,7 +8933,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
 	{
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(greaterThanEqual, type);
 		else
@@ -8953,7 +8953,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpULessThan:
 	case OpSLessThan:
 	{
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(lessThan, type);
 		else
@@ -8973,7 +8973,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
 	{
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(lessThanEqual, type);
 		else

From a9be92569f4c1f5ee813c04f220921583a7dcaf3 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Mon, 14 Oct 2019 16:08:39 +0200
Subject: [PATCH 4/4] HLSL: Fix unrolled S/G LE/LT/GE/GT opcodes.

Need to bitcast the unrolled expressions as well.
---
 .../asm/comp/bitcast_icmp.asm.comp            |  28 +++++
 .../asm/comp/bitcast_icmp.asm.comp            |  29 +++++
 .../shaders/asm/comp/bitcast_icmp.asm.comp    |  27 +++++
 .../asm/comp/bitcast_icmp.asm.comp            |  28 +++++
 .../asm/comp/bitcast_icmp.asm.comp            |  29 +++++
 .../shaders/asm/comp/bitcast_icmp.asm.comp    |  27 +++++
 shaders-hlsl/asm/comp/bitcast_icmp.asm.comp   | 101 ++++++++++++++++++
 shaders-msl/asm/comp/bitcast_icmp.asm.comp    | 101 ++++++++++++++++++
 shaders/asm/comp/bitcast_icmp.asm.comp        | 101 ++++++++++++++++++
 spirv_glsl.cpp                                |  34 +++++-
 spirv_glsl.hpp                                |   3 +-
 spirv_hlsl.cpp                                |  41 +++----
 12 files changed, 523 insertions(+), 26 deletions(-)
 create mode 100644 reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 reference/shaders/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 shaders-msl/asm/comp/bitcast_icmp.asm.comp
 create mode 100644 shaders/asm/comp/bitcast_icmp.asm.comp

diff --git a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..35143a48
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,28 @@
+RWByteAddressBuffer _5 : register(u0);
+RWByteAddressBuffer _6 : register(u1);
+
+void comp_main()
+{
+    bool4 _31 = bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w);
+    bool4 _32 = bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w);
+    bool4 _33 = bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w));
+    bool4 _34 = bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w));
+    bool4 _35 = bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w);
+    bool4 _36 = bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w);
+    bool4 _37 = bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w));
+    bool4 _38 = bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w));
+    _6.Store4(0, uint4(_31.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _31.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _31.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _31.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_32.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _32.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _32.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _32.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_33.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _33.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _33.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _33.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_34.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _34.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _34.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _34.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_35.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _35.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _35.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _35.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_36.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _36.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _36.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _36.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_37.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _37.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _37.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _37.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_38.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _38.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _38.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _38.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..31c71daa
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _3
+{
+    int4 _m0;
+    uint4 _m1;
+};
+
+struct _4
+{
+    uint4 _m0;
+    int4 _m1;
+};
+
+kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
+{
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) < _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) <= _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 < uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 <= uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) > _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) >= _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 > uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 >= uint4(_5._m0));
+}
+
diff --git a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..bed3b90a
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) restrict buffer _3_5
+{
+    ivec4 _m0;
+    uvec4 _m1;
+} _5;
+
+layout(binding = 1, std430) restrict buffer _4_6
+{
+    uvec4 _m0;
+    ivec4 _m1;
+} _6;
+
+void main()
+{
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(_5._m1, uvec4(_5._m0)));
+}
+
diff --git a/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..35143a48
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,28 @@
+RWByteAddressBuffer _5 : register(u0);
+RWByteAddressBuffer _6 : register(u1);
+
+void comp_main()
+{
+    bool4 _31 = bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w);
+    bool4 _32 = bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w);
+    bool4 _33 = bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w));
+    bool4 _34 = bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w));
+    bool4 _35 = bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w);
+    bool4 _36 = bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w);
+    bool4 _37 = bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w));
+    bool4 _38 = bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w));
+    _6.Store4(0, uint4(_31.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _31.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _31.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _31.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_32.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _32.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _32.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _32.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_33.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _33.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _33.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _33.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_34.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _34.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _34.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _34.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_35.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _35.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _35.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _35.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_36.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _36.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _36.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _36.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_37.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _37.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _37.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _37.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+    _6.Store4(0, uint4(_38.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _38.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _38.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _38.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..31c71daa
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _3
+{
+    int4 _m0;
+    uint4 _m1;
+};
+
+struct _4
+{
+    uint4 _m0;
+    int4 _m1;
+};
+
+kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
+{
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) < _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) <= _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 < uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 <= uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) > _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) >= _5._m0);
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 > uint4(_5._m0));
+    _6._m0 = select(uint4(0u), uint4(1u), _5._m1 >= uint4(_5._m0));
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..bed3b90a
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) restrict buffer _3_5
+{
+    ivec4 _m0;
+    uvec4 _m1;
+} _5;
+
+layout(binding = 1, std430) restrict buffer _4_6
+{
+    uvec4 _m0;
+    ivec4 _m1;
+} _6;
+
+void main()
+{
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(_5._m1, uvec4(_5._m0)));
+}
+
diff --git a/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..b7b4e0b2
--- /dev/null
+++ b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..b7b4e0b2
--- /dev/null
+++ b/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_icmp.asm.comp b/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000..b7b4e0b2
--- /dev/null
+++ b/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index ca420d5c..9fd4ff05 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -4159,8 +4159,18 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_
 }
 
 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                           const char *op, bool negate)
+                                           const char *op, bool negate, SPIRType::BaseType expected_type)
 {
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
+
+	SPIRType target_type0 = type0;
+	SPIRType target_type1 = type1;
+	target_type0.basetype = expected_type;
+	target_type1.basetype = expected_type;
+	target_type0.vecsize = 1;
+	target_type1.vecsize = 1;
+
 	auto &type = get<SPIRType>(result_type);
 	auto expr = type_to_glsl_constructor(type);
 	expr += '(';
@@ -4170,11 +4180,21 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result
 		// that these expressions are properly flushed to temporaries if needed.
 		if (negate)
 			expr += "!(";
-		expr += to_extract_component_expression(op0, i);
+
+		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
+			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
+		else
+			expr += to_extract_component_expression(op0, i);
+
 		expr += ' ';
 		expr += op;
 		expr += ' ';
-		expr += to_extract_component_expression(op1, i);
+
+		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
+			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
+		else
+			expr += to_extract_component_expression(op1, i);
+
 		if (negate)
 			expr += ")";
 
@@ -7820,6 +7840,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst
 	case OpSLessThanEqual:
 	case OpSGreaterThan:
 	case OpSGreaterThanEqual:
+	case OpULessThan:
+	case OpULessThanEqual:
+	case OpUGreaterThan:
+	case OpUGreaterThanEqual:
 		return expression_type(ops[2]).width;
 
 	default:
@@ -8842,7 +8866,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
 		else
 			GLSL_BOP(||);
 		break;
@@ -8856,7 +8880,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
 		else
 			GLSL_BOP(&&);
 		break;
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index ddce638b..6f59bd82 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -471,7 +471,8 @@ protected:
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
-	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate);
+	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                             bool negate, SPIRType::BaseType expected_type);
 	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
 	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 8241d3b6..4d4e276c 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -4032,6 +4032,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
 	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
 
 	switch (opcode)
 	{
@@ -4194,7 +4195,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(==, int_type);
 		break;
@@ -4214,7 +4215,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(==);
 		break;
@@ -4226,7 +4227,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(!=, int_type);
 		break;
@@ -4250,7 +4251,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(!=);
 		break;
@@ -4261,10 +4262,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
 		else
 			HLSL_BOP_CAST(>, type);
 		break;
@@ -4276,7 +4277,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>);
 		break;
@@ -4288,7 +4289,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
 		else
 			CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4300,9 +4301,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
 		else
 			HLSL_BOP_CAST(>=, type);
 		break;
@@ -4314,7 +4315,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>=);
 		break;
@@ -4326,7 +4327,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
 		else
 			CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4338,9 +4339,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
 		else
 			HLSL_BOP_CAST(<, type);
 		break;
@@ -4352,7 +4353,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<);
 		break;
@@ -4364,7 +4365,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
 		else
 			CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4376,9 +4377,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
 		else
 			HLSL_BOP_CAST(<=, type);
 		break;
@@ -4390,7 +4391,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<=);
 		break;
@@ -4402,7 +4403,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true);
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
 		else
 			CompilerGLSL::emit_instruction(instruction);
 		break;