From 31be74a853b92d98893e2c35d2319cb227afc9b0 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Thu, 3 Mar 2022 11:04:45 +0100
Subject: [PATCH] Add relax_nan_checks options.

Makes codegen from typical D3D emulation SPIR-V more readable.
Also makes cross compilation with NotEqual more sensible.
It's very rare to actually need the strict NaN-checks in practice.

Also, glslang now emits UnordNotEqual by default it seems, so give up
trying to assume OrdNotEqual. Harmonize for UnordNotEqual as the sane
default.
---
 CMakeLists.txt                                |   2 +-
 main.cpp                                      |   5 +
 .../comp/nmin-max-clamp.relax-nan.asm.comp    |  30 ++
 .../unord-relational-op.relax-nan.asm.frag    |  22 ++
 .../comp/mat3-row-maj-read-write-const.comp   |   3 +-
 .../comp/threadgroup-boolean-workaround.comp  |   2 +-
 .../comp/nmin-max-clamp.relax-nan.asm.comp    |  42 +++
 reference/opt/shaders/comp/cfg.comp           |   2 +-
 .../asm/frag/unordered-compare.asm.frag       |   1 +
 .../frag/unordered-compare.relax-nan.asm.frag |  52 ++++
 ...sm30.vert => empty-shader.nofxc.sm30.vert} |   0
 .../comp/nmin-max-clamp.relax-nan.asm.comp    |  27 ++
 .../frag/fp16.desktop.invalid.frag            |   4 +-
 .../min-max-clamp.relax-nan.invalid.asm.frag  |  69 +++++
 .../asm/frag/unord-relational-op.asm.frag     |   9 +-
 .../unord-relational-op.relax-nan.asm.frag    |  59 ++++
 .../comp/mat3-row-maj-read-write-const.comp   |   2 +-
 .../comp/threadgroup-boolean-workaround.comp  |   2 +-
 .../asm/frag/unordered-compare.asm.frag       |   5 +-
 .../frag/unordered-compare.relax-nan.asm.frag |  34 ++
 .../frag/fp16.invalid.desktop.frag            |   4 +-
 .../comp/nmin-max-clamp.relax-nan.asm.comp    |  39 +++
 reference/shaders/comp/cfg.comp               |  10 +-
 .../asm/frag/unordered-compare.asm.frag       |   2 +
 .../frag/unordered-compare.relax-nan.asm.frag | 179 +++++++++++
 ...sm30.vert => empty-shader.nofxc.sm30.vert} |   0
 .../comp/nmin-max-clamp.relax-nan.asm.comp    | 203 ++++++++++++
 .../min-max-clamp.relax-nan.invalid.asm.frag  | 293 ++++++++++++++++++
 .../asm/frag/unord-relational-op.asm.frag     |   2 +
 .../unord-relational-op.relax-nan.asm.frag    | 207 +++++++++++++
 .../asm/frag/unordered-compare.asm.frag       |   2 +
 .../frag/unordered-compare.relax-nan.asm.frag | 179 +++++++++++
 .../comp/nmin-max-clamp.relax-nan.asm.comp    | 203 ++++++++++++
 spirv_cross_c.cpp                             |   3 +
 spirv_cross_c.h                               |   4 +-
 spirv_glsl.cpp                                |  74 ++++-
 spirv_glsl.hpp                                |  12 +
 spirv_hlsl.cpp                                |   4 +
 spirv_msl.cpp                                 |  12 +-
 test_shaders.py                               |   6 +
 40 files changed, 1777 insertions(+), 33 deletions(-)
 create mode 100644 reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
 create mode 100644 reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
 create mode 100644 reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
 create mode 100644 reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
 rename reference/shaders-hlsl-no-opt/vert/{empty-shader.sm30.vert => empty-shader.nofxc.sm30.vert} (100%)
 create mode 100644 reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
 create mode 100644 reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
 create mode 100644 reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
 create mode 100644 reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
 create mode 100644 shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
 rename shaders-hlsl-no-opt/vert/{empty-shader.sm30.vert => empty-shader.nofxc.sm30.vert} (100%)
 create mode 100644 shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
 create mode 100644 shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
 create mode 100644 shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
 create mode 100644 shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
 create mode 100644 shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6860c5bc..d8ea49a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -332,7 +332,7 @@ if (SPIRV_CROSS_STATIC)
 endif()
 
 set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 48)
+set(spirv-cross-abi-minor 49)
 set(spirv-cross-abi-patch 0)
 
 if (SPIRV_CROSS_SHARED)
diff --git a/main.cpp b/main.cpp
index fe197376..9124ecaf 100644
--- a/main.cpp
+++ b/main.cpp
@@ -669,6 +669,7 @@ struct CLIArguments
 	bool emit_line_directives = false;
 	bool enable_storage_image_qualifier_deduction = true;
 	bool force_zero_initialized_variables = false;
+	bool relax_nan_checks = false;
 	uint32_t force_recompile_max_debug_iterations = 3;
 	SmallVector<uint32_t> msl_discrete_descriptor_sets;
 	SmallVector<uint32_t> msl_device_argument_buffers;
@@ -919,6 +920,7 @@ static void print_help_common()
 	                "\t[--mask-stage-output-builtin <Position|PointSize|ClipDistance|CullDistance>]:\n"
 	                "\t\tIf a stage output variable with matching builtin is active, "
 	                "optimize away the variable if it can affect cross-stage linking correctness.\n"
+	                "\t[--relax-nan-checks]:\n\t\tRelax NaN checks for N{Clamp,Min,Max} and ordered vs. unordered compare instructions.\n"
 	);
 	// clang-format on
 }
@@ -1292,6 +1294,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 	opts.emit_line_directives = args.emit_line_directives;
 	opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction;
 	opts.force_zero_initialized_variables = args.force_zero_initialized_variables;
+	opts.relax_nan_checks = args.relax_nan_checks;
 	opts.force_recompile_max_debug_iterations = args.force_recompile_max_debug_iterations;
 	compiler->set_common_options(opts);
 
@@ -1689,6 +1692,8 @@ static int main_inner(int argc, char *argv[])
 		args.force_recompile_max_debug_iterations = parser.next_uint();
 	});
 
+	cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; });
+
 	cbs.default_handler = [&args](const char *value) { args.input = value; };
 	cbs.add("-", [&args](CLIParser &) { args.input = "-"; });
 	cbs.error_handler = [] { print_help(); };
diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..9f51eff1
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,30 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    for (int _139 = 0; _139 < 2; )
+    {
+        _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+        _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60)))));
+        _139++;
+        continue;
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..aee290f5
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(a + b);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
index 47c83afe..cf26178e 100644
--- a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
+++ b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -12,8 +12,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(device model_t& model [[buffer(0)]])
 {
-    float _38 = (transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x;
-    if ((isunordered(_38, 0.0) || _38 != 0.0))
+    if ((transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x != 0.0)
     {
         model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
     }
diff --git a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
index 8b80929a..c1eccf27 100644
--- a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
+++ b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -13,7 +13,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
 kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     threadgroup short4 foo[4];
-    foo[gl_LocalInvocationIndex] = short4((isunordered(_23.values[gl_GlobalInvocationID.x], float4(10.0)) || _23.values[gl_GlobalInvocationID.x] != float4(10.0)));
+    foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
     threadgroup_barrier(mem_flags::mem_threadgroup);
     _23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
 }
diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..32d8e025
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,42 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = min(_4.b1, _4.c1);
+    _4.a2 = min(_4.b2, _4.c2);
+    _4.a3 = min(_4.b3, _4.c3);
+    _4.a4 = min(_4.b4, _4.c4);
+    _4.a1 = max(_4.b1, _4.c1);
+    _4.a2 = max(_4.b2, _4.c2);
+    _4.a3 = max(_4.b3, _4.c3);
+    _4.a4 = max(_4.b4, _4.c4);
+    _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+    _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+    _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+    _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+    for (int _139 = 0; _139 < 2; )
+    {
+        _4.a2 = min(_4.b2, _4.c2);
+        _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y);
+        _139++;
+        continue;
+    }
+}
+
diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp
index 97cdbc76..af207378 100644
--- a/reference/opt/shaders/comp/cfg.comp
+++ b/reference/opt/shaders/comp/cfg.comp
@@ -10,7 +10,7 @@ float _188;
 
 void main()
 {
-    if (!(_11.data == 0.0))
+    if (_11.data != 0.0)
     {
         _11.data = 10.0;
     }
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
index f18e8e77..021333cc 100644
--- a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -21,6 +21,7 @@ float4 test_vector()
     bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w));
     bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
     bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
     return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
 }
 
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..0172c20b
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,52 @@
+static float4 A;
+static float4 B;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 A : TEXCOORD0;
+    float4 B : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 test_vector()
+{
+    bool4 le = bool4(A.x < B.x, A.y < B.y, A.z < B.z, A.w < B.w);
+    bool4 leq = bool4(A.x <= B.x, A.y <= B.y, A.z <= B.z, A.w <= B.w);
+    bool4 ge = bool4(A.x > B.x, A.y > B.y, A.z > B.z, A.w > B.w);
+    bool4 geq = bool4(A.x >= B.x, A.y >= B.y, A.z >= B.z, A.w >= B.w);
+    bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
+    bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
+}
+
+float test_scalar()
+{
+    bool le = A.x < B.x;
+    bool leq = A.x <= B.x;
+    bool ge = A.x > B.x;
+    bool geq = A.x >= B.x;
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void frag_main()
+{
+    FragColor = test_vector() + test_scalar().xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    A = stage_input.A;
+    B = stage_input.B;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
similarity index 100%
rename from reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert
rename to reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..88f53a4c
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,27 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    for (int i = 0; i < 2; i++, _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60))))))
+    {
+        _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
index 9eb0f595..16182ae2 100644
--- a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
+++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
@@ -77,7 +77,7 @@ void test_conversions()
     half one = test_result();
     int a = int(one);
     uint b = uint(one);
-    bool c = (isunordered(one, half(0.0)) || one != half(0.0));
+    bool c = one != half(0.0);
     float d = float(one);
     half a2 = half(a);
     half b2 = half(b);
@@ -152,7 +152,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
     btmp = v4 > v4;
     btmp = v4 >= v4;
     btmp = v4 == v4;
-    btmp = (isunordered(v4, v4) || v4 != v4);
+    btmp = v4 != v4;
     res = dfdx(v4);
     res = dfdy(v4);
     res = dfdx(v4);
diff --git a/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000..7835e013
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,69 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float v1 [[user(locn0)]];
+    float2 v2 [[user(locn1)]];
+    float3 v3 [[user(locn2)]];
+    float4 v4 [[user(locn3)]];
+    half h1 [[user(locn4)]];
+    half2 h2 [[user(locn5)]];
+    half3 h3 [[user(locn6)]];
+    half4 h4 [[user(locn7)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    float res = fast::min(in.v1, in.v1);
+    res = fast::max(in.v1, in.v1);
+    res = fast::clamp(in.v1, in.v1, in.v1);
+    res = fast::min(in.v1, in.v1);
+    res = fast::max(in.v1, in.v1);
+    res = fast::clamp(in.v1, in.v1, in.v1);
+    float2 res2 = fast::min(in.v2, in.v2);
+    res2 = fast::max(in.v2, in.v2);
+    res2 = fast::clamp(in.v2, in.v2, in.v2);
+    res2 = fast::min(in.v2, in.v2);
+    res2 = fast::max(in.v2, in.v2);
+    res2 = fast::clamp(in.v2, in.v2, in.v2);
+    float3 res3 = fast::min(in.v3, in.v3);
+    res3 = fast::max(in.v3, in.v3);
+    res3 = fast::clamp(in.v3, in.v3, in.v3);
+    res3 = fast::min(in.v3, in.v3);
+    res3 = fast::max(in.v3, in.v3);
+    res3 = fast::clamp(in.v3, in.v3, in.v3);
+    float4 res4 = fast::min(in.v4, in.v4);
+    res4 = fast::max(in.v4, in.v4);
+    res4 = fast::clamp(in.v4, in.v4, in.v4);
+    res4 = fast::min(in.v4, in.v4);
+    res4 = fast::max(in.v4, in.v4);
+    res4 = fast::clamp(in.v4, in.v4, in.v4);
+    half hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    half2 hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    half3 hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    half4 hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+    hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+}
+
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 8df57c55..624408c4 100644
--- a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -29,25 +29,26 @@ fragment main0_out main0(main0_in in [[stage_in]])
     float t0 = a;
     float t1 = b;
     bool c1 = (isunordered(a, b) || a == b);
-    bool c2 = (isunordered(a, b) || a != b);
+    c1 = a != b;
+    bool c2 = a != b;
     bool c3 = (isunordered(a, b) || a < b);
     bool c4 = (isunordered(a, b) || a > b);
     bool c5 = (isunordered(a, b) || a <= b);
     bool c6 = (isunordered(a, b) || a >= b);
     bool2 c7 = (isunordered(in.c, in.d) || in.c == in.d);
-    bool2 c8 = (isunordered(in.c, in.d) || in.c != in.d);
+    bool2 c8 = in.c != in.d;
     bool2 c9 = (isunordered(in.c, in.d) || in.c < in.d);
     bool2 c10 = (isunordered(in.c, in.d) || in.c > in.d);
     bool2 c11 = (isunordered(in.c, in.d) || in.c <= in.d);
     bool2 c12 = (isunordered(in.c, in.d) || in.c >= in.d);
     bool3 c13 = (isunordered(in.e, in.f) || in.e == in.f);
-    bool3 c14 = (isunordered(in.e, in.f) || in.e != in.f);
+    bool3 c14 = in.e != in.f;
     bool3 c15 = (isunordered(in.e, in.f) || in.e < in.f);
     bool3 c16 = (isunordered(in.e, in.f) || in.e > in.f);
     bool3 c17 = (isunordered(in.e, in.f) || in.e <= in.f);
     bool3 c18 = (isunordered(in.e, in.f) || in.e >= in.f);
     bool4 c19 = (isunordered(in.g, in.h) || in.g == in.h);
-    bool4 c20 = (isunordered(in.g, in.h) || in.g != in.h);
+    bool4 c20 = in.g != in.h;
     bool4 c21 = (isunordered(in.g, in.h) || in.g < in.h);
     bool4 c22 = (isunordered(in.g, in.h) || in.g > in.h);
     bool4 c23 = (isunordered(in.g, in.h) || in.g <= in.h);
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..48482806
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,59 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 c [[user(locn2)]];
+    float2 d [[user(locn3)]];
+    float3 e [[user(locn4)]];
+    float3 f [[user(locn5)]];
+    float4 g [[user(locn6)]];
+    float4 h [[user(locn7)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float t0 = a;
+    float t1 = b;
+    bool c1 = a == b;
+    c1 = a != b;
+    bool c2 = a != b;
+    bool c3 = a < b;
+    bool c4 = a > b;
+    bool c5 = a <= b;
+    bool c6 = a >= b;
+    bool2 c7 = in.c == in.d;
+    bool2 c8 = in.c != in.d;
+    bool2 c9 = in.c < in.d;
+    bool2 c10 = in.c > in.d;
+    bool2 c11 = in.c <= in.d;
+    bool2 c12 = in.c >= in.d;
+    bool3 c13 = in.e == in.f;
+    bool3 c14 = in.e != in.f;
+    bool3 c15 = in.e < in.f;
+    bool3 c16 = in.e > in.f;
+    bool3 c17 = in.e <= in.f;
+    bool3 c18 = in.e >= in.f;
+    bool4 c19 = in.g == in.h;
+    bool4 c20 = in.g != in.h;
+    bool4 c21 = in.g < in.h;
+    bool4 c22 = in.g > in.h;
+    bool4 c23 = in.g <= in.h;
+    bool4 c24 = in.g >= in.h;
+    out.FragColor = float4(t0 + t1);
+    return out;
+}
+
diff --git a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
index 54fb89bc..3de0ef44 100644
--- a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
+++ b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -14,7 +14,7 @@ kernel void main0(device model_t& model [[buffer(0)]])
 {
     float3x3 mtx_cm = transpose(model.mtx_rm);
     float3x3 mtx1 = mtx_cm * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0));
-    if ((isunordered(mtx1[0].x, 0.0) || mtx1[0].x != 0.0))
+    if (mtx1[0].x != 0.0)
     {
         model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
     }
diff --git a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
index d01b1351..754f7357 100644
--- a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
+++ b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -15,7 +15,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
 static inline __attribute__((always_inline))
 void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& v_23, thread uint3& gl_GlobalInvocationID)
 {
-    foo[gl_LocalInvocationIndex] = short4((isunordered(v_23.values[gl_GlobalInvocationID.x], float4(10.0)) || v_23.values[gl_GlobalInvocationID.x] != float4(10.0)));
+    foo[gl_LocalInvocationIndex] = short4(v_23.values[gl_GlobalInvocationID.x] != float4(10.0));
     threadgroup_barrier(mem_flags::mem_threadgroup);
     v_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
 }
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
index 21aadfb1..61122bbd 100644
--- a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -11,7 +11,8 @@ vec4 test_vector()
     bvec4 ge = not(lessThanEqual(A, B));
     bvec4 geq = not(lessThan(A, B));
     bvec4 eq = not(notEqual(A, B));
-    bvec4 neq = not(equal(A, B));
+    bvec4 neq = notEqual(A, B);
+    neq = notEqual(A, B);
     return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
 }
 
@@ -22,7 +23,7 @@ float test_scalar()
     bool ge = !(A.x <= B.x);
     bool geq = !(A.x < B.x);
     bool eq = !(A.x != B.x);
-    bool neq = !(A.x == B.x);
+    bool neq = A.x != B.x;
     return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
 }
 
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..24db7c9f
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) in vec4 A;
+layout(location = 1) in vec4 B;
+layout(location = 0) out vec4 FragColor;
+
+vec4 test_vector()
+{
+    bvec4 le = lessThan(A, B);
+    bvec4 leq = lessThanEqual(A, B);
+    bvec4 ge = greaterThan(A, B);
+    bvec4 geq = greaterThanEqual(A, B);
+    bvec4 eq = equal(A, B);
+    bvec4 neq = notEqual(A, B);
+    neq = notEqual(A, B);
+    return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
+}
+
+float test_scalar()
+{
+    bool le = A.x < B.x;
+    bool leq = A.x <= B.x;
+    bool ge = A.x > B.x;
+    bool geq = A.x >= B.x;
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void main()
+{
+    FragColor = test_vector() + vec4(test_scalar());
+}
+
diff --git a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
index 55f5235e..faf79b2b 100644
--- a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
+++ b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
@@ -50,7 +50,7 @@ void test_conversions()
     float16_t one = test_result();
     int a = int(one);
     uint b = uint(one);
-    bool c = !(one == float16_t(0.0));
+    bool c = one != float16_t(0.0);
     float d = float(one);
     double e = double(one);
     float16_t a2 = float16_t(a);
@@ -126,7 +126,7 @@ void test_builtins()
     btmp = greaterThan(v4, v4);
     btmp = greaterThanEqual(v4, v4);
     btmp = equal(v4, v4);
-    btmp = not(equal(v4, v4));
+    btmp = notEqual(v4, v4);
     res = dFdx(v4);
     res = dFdy(v4);
     res = dFdxFine(v4);
diff --git a/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..449a87d0
--- /dev/null
+++ b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,39 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = min(_4.b1, _4.c1);
+    _4.a2 = min(_4.b2, _4.c2);
+    _4.a3 = min(_4.b3, _4.c3);
+    _4.a4 = min(_4.b4, _4.c4);
+    _4.a1 = max(_4.b1, _4.c1);
+    _4.a2 = max(_4.b2, _4.c2);
+    _4.a3 = max(_4.b3, _4.c3);
+    _4.a4 = max(_4.b4, _4.c4);
+    _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+    _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+    _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+    _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+    for (int i = 0; i < 2; i++, _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y))
+    {
+        _4.a2 = min(_4.b2, _4.c2);
+    }
+}
+
diff --git a/reference/shaders/comp/cfg.comp b/reference/shaders/comp/cfg.comp
index a91c8732..77ad312c 100644
--- a/reference/shaders/comp/cfg.comp
+++ b/reference/shaders/comp/cfg.comp
@@ -8,7 +8,7 @@ layout(binding = 0, std430) buffer SSBO
 
 void test()
 {
-    if (!(_11.data == 0.0))
+    if (_11.data != 0.0)
     {
         float tmp = 10.0;
         _11.data = tmp;
@@ -18,12 +18,12 @@ void test()
         float tmp_1 = 15.0;
         _11.data = tmp_1;
     }
-    if (!(_11.data == 0.0))
+    if (_11.data != 0.0)
     {
         float e;
-        if (!(_11.data == 5.0))
+        if (_11.data != 5.0)
         {
-            if (!(_11.data == 6.0))
+            if (_11.data != 6.0)
             {
                 e = 10.0;
             }
@@ -70,7 +70,7 @@ void test()
     float m;
     do
     {
-    } while (!(m == 20.0));
+    } while (m != 20.0);
     _11.data = m;
 }
 
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
index 4ad8fc5a..2e5e0309 100644
--- a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -89,6 +89,8 @@
          %38 = OpLoad %v4float %B
          %39 = OpFUnordEqual %v4bool %37 %38
                OpStore %eq %39
+         %ordered = OpFOrdNotEqual %v4bool %37 %38
+               OpStore %neq %ordered
          %41 = OpLoad %v4float %A
          %42 = OpLoad %v4float %B
          %43 = OpFUnordNotEqual %v4bool %41 %42
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..2e5e0309
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %ordered = OpFOrdNotEqual %v4bool %37 %38
+               OpStore %neq %ordered
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
similarity index 100%
rename from shaders-hlsl-no-opt/vert/empty-shader.sm30.vert
rename to shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000..ad566615
--- /dev/null
+++ b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,293 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 205
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpExtension "SPV_AMD_gpu_shader_half_float"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpName %main "main"
+               OpName %res "res"
+               OpName %res2 "res2"
+               OpName %res3 "res3"
+               OpName %res4 "res4"
+               OpName %hres "hres"
+               OpName %hres2 "hres2"
+               OpName %hres3 "hres3"
+               OpName %hres4 "hres4"
+               OpName %v1 "v1"
+               OpName %v2 "v2"
+               OpName %v3 "v3"
+               OpName %v4 "v4"
+               OpName %h1 "h1"
+               OpName %h2 "h2"
+               OpName %h3 "h3"
+               OpName %h4 "h4"
+               OpDecorate %v1 Location 0
+               OpDecorate %v2 Location 1
+               OpDecorate %v3 Location 2
+               OpDecorate %v4 Location 3
+               OpDecorate %h1 Location 4
+               OpDecorate %h2 Location 5
+               OpDecorate %h3 Location 6
+               OpDecorate %h4 Location 7
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Function_half = OpTypePointer Function %half
+%_ptr_Input_half = OpTypePointer Input %half
+%_ptr_Function_v2half = OpTypePointer Function %v2half
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+%_ptr_Function_v3half = OpTypePointer Function %v3half
+%_ptr_Input_v3half = OpTypePointer Input %v3half
+%_ptr_Function_v4half = OpTypePointer Function %v4half
+%_ptr_Input_v4half = OpTypePointer Input %v4half
+         %v1 = OpVariable %_ptr_Input_float Input
+         %v2 = OpVariable %_ptr_Input_v2float Input
+         %v3 = OpVariable %_ptr_Input_v3float Input
+         %v4 = OpVariable %_ptr_Input_v4float Input
+         %h1 = OpVariable %_ptr_Input_half Input
+         %h2 = OpVariable %_ptr_Input_v2half Input
+         %h3 = OpVariable %_ptr_Input_v3half Input
+         %h4 = OpVariable %_ptr_Input_v4half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %res = OpVariable %_ptr_Function_float Function
+         %46 = OpLoad %float %v1
+         %47 = OpLoad %float %v1
+         %48 = OpExtInst %float %1 FMin %46 %47
+               OpStore %res %48
+         %49 = OpLoad %float %v1
+         %50 = OpLoad %float %v1
+         %51 = OpExtInst %float %1 FMax %49 %50
+               OpStore %res %51
+         %52 = OpLoad %float %v1
+         %53 = OpLoad %float %v1
+         %54 = OpLoad %float %v1
+         %55 = OpExtInst %float %1 FClamp %52 %53 %54
+               OpStore %res %55
+         %56 = OpLoad %float %v1
+         %57 = OpLoad %float %v1
+         %58 = OpExtInst %float %1 NMin %56 %57
+               OpStore %res %58
+         %59 = OpLoad %float %v1
+         %60 = OpLoad %float %v1
+         %61 = OpExtInst %float %1 NMax %59 %60
+               OpStore %res %61
+         %62 = OpLoad %float %v1
+         %63 = OpLoad %float %v1
+         %64 = OpLoad %float %v1
+         %65 = OpExtInst %float %1 NClamp %62 %63 %64
+               OpStore %res %65
+       %res2 = OpVariable %_ptr_Function_v2float Function
+         %66 = OpLoad %v2float %v2
+         %67 = OpLoad %v2float %v2
+         %68 = OpExtInst %v2float %1 FMin %66 %67
+               OpStore %res2 %68
+         %69 = OpLoad %v2float %v2
+         %70 = OpLoad %v2float %v2
+         %71 = OpExtInst %v2float %1 FMax %69 %70
+               OpStore %res2 %71
+         %72 = OpLoad %v2float %v2
+         %73 = OpLoad %v2float %v2
+         %74 = OpLoad %v2float %v2
+         %75 = OpExtInst %v2float %1 FClamp %72 %73 %74
+               OpStore %res2 %75
+         %76 = OpLoad %v2float %v2
+         %77 = OpLoad %v2float %v2
+         %78 = OpExtInst %v2float %1 NMin %76 %77
+               OpStore %res2 %78
+         %79 = OpLoad %v2float %v2
+         %80 = OpLoad %v2float %v2
+         %81 = OpExtInst %v2float %1 NMax %79 %80
+               OpStore %res2 %81
+         %82 = OpLoad %v2float %v2
+         %83 = OpLoad %v2float %v2
+         %84 = OpLoad %v2float %v2
+         %85 = OpExtInst %v2float %1 NClamp %82 %83 %84
+               OpStore %res2 %85
+       %res3 = OpVariable %_ptr_Function_v3float Function
+         %86 = OpLoad %v3float %v3
+         %87 = OpLoad %v3float %v3
+         %88 = OpExtInst %v3float %1 FMin %86 %87
+               OpStore %res3 %88
+         %89 = OpLoad %v3float %v3
+         %90 = OpLoad %v3float %v3
+         %91 = OpExtInst %v3float %1 FMax %89 %90
+               OpStore %res3 %91
+         %92 = OpLoad %v3float %v3
+         %93 = OpLoad %v3float %v3
+         %94 = OpLoad %v3float %v3
+         %95 = OpExtInst %v3float %1 FClamp %92 %93 %94
+               OpStore %res3 %95
+         %96 = OpLoad %v3float %v3
+         %97 = OpLoad %v3float %v3
+         %98 = OpExtInst %v3float %1 NMin %96 %97
+               OpStore %res3 %98
+         %99 = OpLoad %v3float %v3
+        %100 = OpLoad %v3float %v3
+        %101 = OpExtInst %v3float %1 NMax %99 %100
+               OpStore %res3 %101
+        %102 = OpLoad %v3float %v3
+        %103 = OpLoad %v3float %v3
+        %104 = OpLoad %v3float %v3
+        %105 = OpExtInst %v3float %1 NClamp %102 %103 %104
+               OpStore %res3 %105
+       %res4 = OpVariable %_ptr_Function_v4float Function
+        %106 = OpLoad %v4float %v4
+        %107 = OpLoad %v4float %v4
+        %108 = OpExtInst %v4float %1 FMin %106 %107
+               OpStore %res4 %108
+        %109 = OpLoad %v4float %v4
+        %110 = OpLoad %v4float %v4
+        %111 = OpExtInst %v4float %1 FMax %109 %110
+               OpStore %res4 %111
+        %112 = OpLoad %v4float %v4
+        %113 = OpLoad %v4float %v4
+        %114 = OpLoad %v4float %v4
+        %115 = OpExtInst %v4float %1 FClamp %112 %113 %114
+               OpStore %res4 %115
+        %116 = OpLoad %v4float %v4
+        %117 = OpLoad %v4float %v4
+        %118 = OpExtInst %v4float %1 NMin %116 %117
+               OpStore %res4 %118
+        %119 = OpLoad %v4float %v4
+        %120 = OpLoad %v4float %v4
+        %121 = OpExtInst %v4float %1 NMax %119 %120
+               OpStore %res4 %121
+        %122 = OpLoad %v4float %v4
+        %123 = OpLoad %v4float %v4
+        %124 = OpLoad %v4float %v4
+        %125 = OpExtInst %v4float %1 NClamp %122 %123 %124
+               OpStore %res4 %125
+       %hres = OpVariable %_ptr_Function_half Function
+        %126 = OpLoad %half %h1
+        %127 = OpLoad %half %h1
+        %128 = OpExtInst %half %1 FMin %126 %127
+               OpStore %hres %128
+        %129 = OpLoad %half %h1
+        %130 = OpLoad %half %h1
+        %131 = OpExtInst %half %1 FMax %129 %130
+               OpStore %hres %131
+        %132 = OpLoad %half %h1
+        %133 = OpLoad %half %h1
+        %134 = OpLoad %half %h1
+        %135 = OpExtInst %half %1 FClamp %132 %133 %134
+               OpStore %hres %135
+        %136 = OpLoad %half %h1
+        %137 = OpLoad %half %h1
+        %138 = OpExtInst %half %1 NMin %136 %137
+               OpStore %hres %138
+        %139 = OpLoad %half %h1
+        %140 = OpLoad %half %h1
+        %141 = OpExtInst %half %1 NMax %139 %140
+               OpStore %hres %141
+        %142 = OpLoad %half %h1
+        %143 = OpLoad %half %h1
+        %144 = OpLoad %half %h1
+        %145 = OpExtInst %half %1 NClamp %142 %143 %144
+               OpStore %hres %145
+      %hres2 = OpVariable %_ptr_Function_v2half Function
+        %146 = OpLoad %v2half %h2
+        %147 = OpLoad %v2half %h2
+        %148 = OpExtInst %v2half %1 FMin %146 %147
+               OpStore %hres2 %148
+        %149 = OpLoad %v2half %h2
+        %150 = OpLoad %v2half %h2
+        %151 = OpExtInst %v2half %1 FMax %149 %150
+               OpStore %hres2 %151
+        %152 = OpLoad %v2half %h2
+        %153 = OpLoad %v2half %h2
+        %154 = OpLoad %v2half %h2
+        %155 = OpExtInst %v2half %1 FClamp %152 %153 %154
+               OpStore %hres2 %155
+        %156 = OpLoad %v2half %h2
+        %157 = OpLoad %v2half %h2
+        %158 = OpExtInst %v2half %1 NMin %156 %157
+               OpStore %hres2 %158
+        %159 = OpLoad %v2half %h2
+        %160 = OpLoad %v2half %h2
+        %161 = OpExtInst %v2half %1 NMax %159 %160
+               OpStore %hres2 %161
+        %162 = OpLoad %v2half %h2
+        %163 = OpLoad %v2half %h2
+        %164 = OpLoad %v2half %h2
+        %165 = OpExtInst %v2half %1 NClamp %162 %163 %164
+               OpStore %hres2 %165
+      %hres3 = OpVariable %_ptr_Function_v3half Function
+        %166 = OpLoad %v3half %h3
+        %167 = OpLoad %v3half %h3
+        %168 = OpExtInst %v3half %1 FMin %166 %167
+               OpStore %hres3 %168
+        %169 = OpLoad %v3half %h3
+        %170 = OpLoad %v3half %h3
+        %171 = OpExtInst %v3half %1 FMax %169 %170
+               OpStore %hres3 %171
+        %172 = OpLoad %v3half %h3
+        %173 = OpLoad %v3half %h3
+        %174 = OpLoad %v3half %h3
+        %175 = OpExtInst %v3half %1 FClamp %172 %173 %174
+               OpStore %hres3 %175
+        %176 = OpLoad %v3half %h3
+        %177 = OpLoad %v3half %h3
+        %178 = OpExtInst %v3half %1 NMin %176 %177
+               OpStore %hres3 %178
+        %179 = OpLoad %v3half %h3
+        %180 = OpLoad %v3half %h3
+        %181 = OpExtInst %v3half %1 NMax %179 %180
+               OpStore %hres3 %181
+        %182 = OpLoad %v3half %h3
+        %183 = OpLoad %v3half %h3
+        %184 = OpLoad %v3half %h3
+        %185 = OpExtInst %v3half %1 NClamp %182 %183 %184
+               OpStore %hres3 %185
+      %hres4 = OpVariable %_ptr_Function_v4half Function
+        %186 = OpLoad %v4half %h4
+        %187 = OpLoad %v4half %h4
+        %188 = OpExtInst %v4half %1 FMin %186 %187
+               OpStore %hres4 %188
+        %189 = OpLoad %v4half %h4
+        %190 = OpLoad %v4half %h4
+        %191 = OpExtInst %v4half %1 FMax %189 %190
+               OpStore %hres4 %191
+        %192 = OpLoad %v4half %h4
+        %193 = OpLoad %v4half %h4
+        %194 = OpLoad %v4half %h4
+        %195 = OpExtInst %v4half %1 FClamp %192 %193 %194
+               OpStore %hres4 %195
+        %196 = OpLoad %v4half %h4
+        %197 = OpLoad %v4half %h4
+        %198 = OpExtInst %v4half %1 NMin %196 %197
+               OpStore %hres4 %198
+        %199 = OpLoad %v4half %h4
+        %200 = OpLoad %v4half %h4
+        %201 = OpExtInst %v4half %1 NMax %199 %200
+               OpStore %hres4 %201
+        %202 = OpLoad %v4half %h4
+        %203 = OpLoad %v4half %h4
+        %204 = OpLoad %v4half %h4
+        %205 = OpExtInst %v4half %1 NClamp %202 %203 %204
+               OpStore %hres4 %205
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/unord-relational-op.asm.frag b/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 3e4cd6c2..824c0512 100644
--- a/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -114,6 +114,8 @@
                OpStore %t1 %b
          %15 = OpFUnordEqual %bool %a %b
                OpStore %c1 %15
+         %ordered = OpFOrdNotEqual %bool %a %b
+               OpStore %c1 %ordered
          %17 = OpFUnordNotEqual %bool %a %b
                OpStore %c2 %17
          %19 = OpFUnordLessThan %bool %a %b
diff --git a/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..824c0512
--- /dev/null
+++ b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,207 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 122
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %c %d %e %f %g %h %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 460
+               OpName %main "main"
+               OpName %t0 "t0"
+               OpName %a "a"
+               OpName %t1 "t1"
+               OpName %b "b"
+               OpName %c1 "c1"
+               OpName %c2 "c2"
+               OpName %c3 "c3"
+               OpName %c4 "c4"
+               OpName %c5 "c5"
+               OpName %c6 "c6"
+               OpName %c7 "c7"
+               OpName %c "c"
+               OpName %d "d"
+               OpName %c8 "c8"
+               OpName %c9 "c9"
+               OpName %c10 "c10"
+               OpName %c11 "c11"
+               OpName %c12 "c12"
+               OpName %c13 "c13"
+               OpName %e "e"
+               OpName %f "f"
+               OpName %c14 "c14"
+               OpName %c15 "c15"
+               OpName %c16 "c16"
+               OpName %c17 "c17"
+               OpName %c18 "c18"
+               OpName %c19 "c19"
+               OpName %g "g"
+               OpName %h "h"
+               OpName %c20 "c20"
+               OpName %c21 "c21"
+               OpName %c22 "c22"
+               OpName %c23 "c23"
+               OpName %c24 "c24"
+               OpName %FragColor "FragColor"
+               OpDecorate %a SpecId 1
+               OpDecorate %b SpecId 2
+               OpDecorate %c Location 2
+               OpDecorate %d Location 3
+               OpDecorate %e Location 4
+               OpDecorate %f Location 5
+               OpDecorate %g Location 6
+               OpDecorate %h Location 7
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %a = OpSpecConstant %float 1
+          %b = OpSpecConstant %float 2
+       %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+     %v2bool = OpTypeVector %bool 2
+%_ptr_Function_v2bool = OpTypePointer Function %v2bool
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+          %c = OpVariable %_ptr_Input_v2float Input
+          %d = OpVariable %_ptr_Input_v2float Input
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Function_v3bool = OpTypePointer Function %v3bool
+    %v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+          %e = OpVariable %_ptr_Input_v3float Input
+          %f = OpVariable %_ptr_Input_v3float Input
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %g = OpVariable %_ptr_Input_v4float Input
+          %h = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %t0 = OpVariable %_ptr_Function_float Function
+         %t1 = OpVariable %_ptr_Function_float Function
+         %c1 = OpVariable %_ptr_Function_bool Function
+         %c2 = OpVariable %_ptr_Function_bool Function
+         %c3 = OpVariable %_ptr_Function_bool Function
+         %c4 = OpVariable %_ptr_Function_bool Function
+         %c5 = OpVariable %_ptr_Function_bool Function
+         %c6 = OpVariable %_ptr_Function_bool Function
+         %c7 = OpVariable %_ptr_Function_v2bool Function
+         %c8 = OpVariable %_ptr_Function_v2bool Function
+         %c9 = OpVariable %_ptr_Function_v2bool Function
+        %c10 = OpVariable %_ptr_Function_v2bool Function
+        %c11 = OpVariable %_ptr_Function_v2bool Function
+        %c12 = OpVariable %_ptr_Function_v2bool Function
+        %c13 = OpVariable %_ptr_Function_v3bool Function
+        %c14 = OpVariable %_ptr_Function_v3bool Function
+        %c15 = OpVariable %_ptr_Function_v3bool Function
+        %c16 = OpVariable %_ptr_Function_v3bool Function
+        %c17 = OpVariable %_ptr_Function_v3bool Function
+        %c18 = OpVariable %_ptr_Function_v3bool Function
+        %c19 = OpVariable %_ptr_Function_v4bool Function
+        %c20 = OpVariable %_ptr_Function_v4bool Function
+        %c21 = OpVariable %_ptr_Function_v4bool Function
+        %c22 = OpVariable %_ptr_Function_v4bool Function
+        %c23 = OpVariable %_ptr_Function_v4bool Function
+        %c24 = OpVariable %_ptr_Function_v4bool Function
+               OpStore %t0 %a
+               OpStore %t1 %b
+         %15 = OpFUnordEqual %bool %a %b
+               OpStore %c1 %15
+         %ordered = OpFOrdNotEqual %bool %a %b
+               OpStore %c1 %ordered
+         %17 = OpFUnordNotEqual %bool %a %b
+               OpStore %c2 %17
+         %19 = OpFUnordLessThan %bool %a %b
+               OpStore %c3 %19
+         %21 = OpFUnordGreaterThan %bool %a %b
+               OpStore %c4 %21
+         %23 = OpFUnordLessThanEqual %bool %a %b
+               OpStore %c5 %23
+         %25 = OpFUnordGreaterThanEqual %bool %a %b
+               OpStore %c6 %25
+         %32 = OpLoad %v2float %c
+         %34 = OpLoad %v2float %d
+         %35 = OpFUnordEqual %v2bool %32 %34
+               OpStore %c7 %35
+         %37 = OpLoad %v2float %c
+         %38 = OpLoad %v2float %d
+         %39 = OpFUnordNotEqual %v2bool %37 %38
+               OpStore %c8 %39
+         %41 = OpLoad %v2float %c
+         %42 = OpLoad %v2float %d
+         %43 = OpFUnordLessThan %v2bool %41 %42
+               OpStore %c9 %43
+         %45 = OpLoad %v2float %c
+         %46 = OpLoad %v2float %d
+         %47 = OpFUnordGreaterThan %v2bool %45 %46
+               OpStore %c10 %47
+         %49 = OpLoad %v2float %c
+         %50 = OpLoad %v2float %d
+         %51 = OpFUnordLessThanEqual %v2bool %49 %50
+               OpStore %c11 %51
+         %53 = OpLoad %v2float %c
+         %54 = OpLoad %v2float %d
+         %55 = OpFUnordGreaterThanEqual %v2bool %53 %54
+               OpStore %c12 %55
+         %62 = OpLoad %v3float %e
+         %64 = OpLoad %v3float %f
+         %65 = OpFUnordEqual %v3bool %62 %64
+               OpStore %c13 %65
+         %67 = OpLoad %v3float %e
+         %68 = OpLoad %v3float %f
+         %69 = OpFUnordNotEqual %v3bool %67 %68
+               OpStore %c14 %69
+         %71 = OpLoad %v3float %e
+         %72 = OpLoad %v3float %f
+         %73 = OpFUnordLessThan %v3bool %71 %72
+               OpStore %c15 %73
+         %75 = OpLoad %v3float %e
+         %76 = OpLoad %v3float %f
+         %77 = OpFUnordGreaterThan %v3bool %75 %76
+               OpStore %c16 %77
+         %79 = OpLoad %v3float %e
+         %80 = OpLoad %v3float %f
+         %81 = OpFUnordLessThanEqual %v3bool %79 %80
+               OpStore %c17 %81
+         %83 = OpLoad %v3float %e
+         %84 = OpLoad %v3float %f
+         %85 = OpFUnordGreaterThanEqual %v3bool %83 %84
+               OpStore %c18 %85
+         %92 = OpLoad %v4float %g
+         %94 = OpLoad %v4float %h
+         %95 = OpFUnordEqual %v4bool %92 %94
+               OpStore %c19 %95
+         %97 = OpLoad %v4float %g
+         %98 = OpLoad %v4float %h
+         %99 = OpFUnordNotEqual %v4bool %97 %98
+               OpStore %c20 %99
+        %101 = OpLoad %v4float %g
+        %102 = OpLoad %v4float %h
+        %103 = OpFUnordLessThan %v4bool %101 %102
+               OpStore %c21 %103
+        %105 = OpLoad %v4float %g
+        %106 = OpLoad %v4float %h
+        %107 = OpFUnordGreaterThan %v4bool %105 %106
+               OpStore %c22 %107
+        %109 = OpLoad %v4float %g
+        %110 = OpLoad %v4float %h
+        %111 = OpFUnordLessThanEqual %v4bool %109 %110
+               OpStore %c23 %111
+        %113 = OpLoad %v4float %g
+        %114 = OpLoad %v4float %h
+        %115 = OpFUnordGreaterThanEqual %v4bool %113 %114
+               OpStore %c24 %115
+        %118 = OpLoad %float %t0
+        %119 = OpLoad %float %t1
+        %120 = OpFAdd %float %118 %119
+        %121 = OpCompositeConstruct %v4float %120 %120 %120 %120
+               OpStore %FragColor %121
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
index 4ad8fc5a..15286e08 100644
--- a/shaders-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -93,6 +93,8 @@
          %42 = OpLoad %v4float %B
          %43 = OpFUnordNotEqual %v4bool %41 %42
                OpStore %neq %43
+         %ordered = OpFOrdNotEqual %v4bool %41 %42
+               OpStore %neq %ordered
          %44 = OpLoad %v4bool %le
          %49 = OpSelect %v4float %44 %48 %47
          %50 = OpLoad %v4bool %leq
diff --git a/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..15286e08
--- /dev/null
+++ b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %ordered = OpFOrdNotEqual %v4bool %41 %42
+               OpStore %neq %ordered
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp
index db98de15..4a62b635 100644
--- a/spirv_cross_c.cpp
+++ b/spirv_cross_c.cpp
@@ -475,6 +475,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT:
 		options->glsl.ovr_multiview_view_count = value;
 		break;
+	case SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS:
+		options->glsl.relax_nan_checks = value != 0;
+		break;
 #endif
 
 #if SPIRV_CROSS_C_API_HLSL
diff --git a/spirv_cross_c.h b/spirv_cross_c.h
index a590c805..a35a5d65 100644
--- a/spirv_cross_c.h
+++ b/spirv_cross_c.h
@@ -40,7 +40,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 48
+#define SPVC_C_API_VERSION_MINOR 49
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0
 
@@ -677,6 +677,8 @@ typedef enum spvc_compiler_option
 
 	SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT,
 
+	SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
 	SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
 } spvc_compiler_option;
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index bacf709c..4c465133 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -7307,6 +7307,63 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool
 	return farg_str;
 }
 
+Op CompilerGLSL::get_remapped_spirv_op(Op op) const
+{
+	if (options.relax_nan_checks)
+	{
+		switch (op)
+		{
+		case OpFUnordLessThan:
+			op = OpFOrdLessThan;
+			break;
+		case OpFUnordLessThanEqual:
+			op = OpFOrdLessThanEqual;
+			break;
+		case OpFUnordGreaterThan:
+			op = OpFOrdGreaterThan;
+			break;
+		case OpFUnordGreaterThanEqual:
+			op = OpFOrdGreaterThanEqual;
+			break;
+		case OpFUnordEqual:
+			op = OpFOrdEqual;
+			break;
+		case OpFOrdNotEqual:
+			op = OpFUnordNotEqual;
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	return op;
+}
+
+GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
+{
+	// Relax to non-NaN aware opcodes.
+	if (options.relax_nan_checks)
+	{
+		switch (std450_op)
+		{
+		case GLSLstd450NClamp:
+			std450_op = GLSLstd450FClamp;
+			break;
+		case GLSLstd450NMin:
+			std450_op = GLSLstd450FMin;
+			break;
+		case GLSLstd450NMax:
+			std450_op = GLSLstd450FMax;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return std450_op;
+}
+
 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
 {
 	auto op = static_cast<GLSLstd450>(eop);
@@ -7319,6 +7376,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	op = get_remapped_glsl_op(op);
+
 	switch (op)
 	{
 	// FP fiddling
@@ -10094,6 +10153,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	opcode = get_remapped_spirv_op(opcode);
+
 	switch (opcode)
 	{
 	// Dealing with memory
@@ -11220,7 +11281,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+	case OpFUnordNotEqual:
 	{
+		// GLSL is fuzzy on what to do with ordered vs unordered not equal.
+		// glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
+		// but this means we have no easy way of implementing ordered not equal.
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP(notEqual);
 		else
@@ -12540,7 +12605,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFUnordEqual:
-	case OpFUnordNotEqual:
 	case OpFUnordLessThan:
 	case OpFUnordGreaterThan:
 	case OpFUnordLessThanEqual:
@@ -12563,10 +12627,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				comp_op = "notEqual";
 				break;
 
-			case OpFUnordNotEqual:
-				comp_op = "equal";
-				break;
-
 			case OpFUnordLessThan:
 				comp_op = "greaterThanEqual";
 				break;
@@ -12599,10 +12659,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				comp_op = " != ";
 				break;
 
-			case OpFUnordNotEqual:
-				comp_op = " == ";
-				break;
-
 			case OpFUnordLessThan:
 				comp_op = " >= ";
 				break;
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index b2073abd..b892e0c3 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -138,6 +138,13 @@ public:
 		// what happens on legacy GLSL targets for blocks and structs.
 		bool force_flattened_io_blocks = false;
 
+		// For opcodes where we have to perform explicit additional nan checks, very ugly code is generated.
+		// If we opt-in, ignore these requirements.
+		// In opcodes like NClamp/NMin/NMax and FP compare, ignore NaN behavior.
+		// Use FClamp/FMin/FMax semantics for clamps and lets implementation choose ordered or unordered
+		// compares.
+		bool relax_nan_checks = false;
+
 		// If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2.
 		uint32_t ovr_multiview_view_count = 0;
 
@@ -362,6 +369,11 @@ protected:
 
 	virtual void emit_instruction(const Instruction &instr);
 	void emit_block_instructions(SPIRBlock &block);
+
+	// For relax_nan_checks.
+	GLSLstd450 get_remapped_glsl_op(GLSLstd450 std450_op) const;
+	spv::Op get_remapped_spirv_op(spv::Op op) const;
+
 	virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                          uint32_t count);
 	virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 4e4e4ca7..646fce33 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -3524,6 +3524,8 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	op = get_remapped_glsl_op(op);
+
 	switch (op)
 	{
 	case GLSLstd450InverseSqrt:
@@ -4792,6 +4794,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	opcode = get_remapped_spirv_op(opcode);
+
 	switch (opcode)
 	{
 	case OpAccessChain:
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index db5ef5d1..4eeb3b7f 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -7632,6 +7632,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	auto ops = stream(instruction);
 	auto opcode = static_cast<Op>(instruction.op);
 
+	opcode = get_remapped_spirv_op(opcode);
+
 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
 	auto int_type = to_signed_basetype(integer_width);
@@ -7674,6 +7676,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+		// TODO: Should probably negate the == result here.
+		// Typically OrdNotEqual comes from GLSL which itself does not really specify what
+		// happens with NaN.
+		// Consider fixing this if we run into real issues.
 		MSL_BOP(!=);
 		break;
 
@@ -7730,7 +7736,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFUnordNotEqual:
-		MSL_UNORD_BOP(!=);
+		// not equal in MSL generates une opcodes to begin with.
+		// Since unordered not equal is how it works in C, just inherit that behavior.
+		MSL_BOP(!=);
 		break;
 
 	case OpFUnordGreaterThan:
@@ -8993,6 +9001,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	op = get_remapped_glsl_op(op);
+
 	switch (op)
 	{
 	case GLSLstd450Sinh:
diff --git a/test_shaders.py b/test_shaders.py
index 640baa23..49038939 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -350,6 +350,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
     if '.mask-clip-distance.' in shader:
         msl_args.append('--mask-stage-output-builtin')
         msl_args.append('ClipDistance')
+    if '.relax-nan.' in shader:
+        msl_args.append('--relax-nan-checks')
 
     subprocess.check_call(msl_args)
 
@@ -474,6 +476,8 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati
         hlsl_args.append('--hlsl-enable-16bit-types')
     if '.flatten-matrix-vertex-input.' in shader:
         hlsl_args.append('--hlsl-flatten-matrix-vertex-input-semantics')
+    if '.relax-nan.' in shader:
+        hlsl_args.append('--relax-nan-checks')
 
     subprocess.check_call(hlsl_args)
 
@@ -576,6 +580,8 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
         extra_args += ['--force-zero-initialized-variables']
     if '.force-flattened-io.' in shader:
         extra_args += ['--glsl-force-flattened-io-blocks']
+    if '.relax-nan.' in shader:
+        extra_args.append('--relax-nan-checks')
 
     spirv_cross_path = paths.spirv_cross