From 27af716c3ade102d427df06aaedbf25dd220911f Mon Sep 17 00:00:00 2001
From: Chip Davis <cdavis@codeweavers.com>
Date: Fri, 31 Aug 2018 18:15:07 -0500
Subject: [PATCH] MSL: Emit F{Min,Max,Clamp} as fast:: and N{Min,Max,Clamp} as
 precise::.

This roughly matches their semantics in SPIR-V and MSL. For `FMin`,
`FMax`, and `FClamp`, and the Metal functions `fast::min()`,
`fast::max()`, and `fast::clamp()`, the result is undefined if any
operand is NaN. For the 'N' operations and their corresponding MSL
`precise::` functions, the result is consistent with IEEE 754 (first
non-NaN wins; result is NaN if all operands are NaN).

We can only do this with 32-bit floats, though, because Metal only
provides these variants for `float`. `half` only has one variant of
these functions that is presumably consistent with IEEE 754. I guess
that's OK; the SPIR-V spec only says that `F{Min,Max,Clamp}` are
undefined for NaNs. Performance might suffer, though.
---
 .../asm/frag/min-max-clamp.asm.frag           |  69 +++++
 .../asm/frag/vector-shuffle-oom.asm.frag      |  78 ++---
 .../shaders-msl/flatten/struct.flatten.vert   |   2 +-
 reference/shaders-msl/vert/copy.flatten.vert  |   2 +-
 .../shaders-msl/vert/dynamic.flatten.vert     |   2 +-
 shaders-msl/asm/frag/min-max-clamp.asm.frag   | 293 ++++++++++++++++++
 spirv_msl.cpp                                 |  47 +++
 7 files changed, 451 insertions(+), 42 deletions(-)
 create mode 100644 reference/shaders-msl/asm/frag/min-max-clamp.asm.frag
 create mode 100644 shaders-msl/asm/frag/min-max-clamp.asm.frag

diff --git a/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag b/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag
new file mode 100644
index 00000000..f597a6eb
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag
@@ -0,0 +1,69 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float v1 [[user(locn0)]];
+    float2 v2 [[user(locn1)]];
+    float3 v3 [[user(locn2)]];
+    float4 v4 [[user(locn3)]];
+    half h1 [[user(locn4)]];
+    half2 h2 [[user(locn5)]];
+    half3 h3 [[user(locn6)]];
+    half4 h4 [[user(locn7)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    float res = fast::min(in.v1, in.v1);
+    res = fast::max(in.v1, in.v1);
+    res = fast::clamp(in.v1, in.v1, in.v1);
+    res = precise::min(in.v1, in.v1);
+    res = precise::max(in.v1, in.v1);
+    res = precise::clamp(in.v1, in.v1, in.v1);
+    float2 res2 = fast::min(in.v2, in.v2);
+    res2 = fast::max(in.v2, in.v2);
+    res2 = fast::clamp(in.v2, in.v2, in.v2);
+    res2 = precise::min(in.v2, in.v2);
+    res2 = precise::max(in.v2, in.v2);
+    res2 = precise::clamp(in.v2, in.v2, in.v2);
+    float3 res3 = fast::min(in.v3, in.v3);
+    res3 = fast::max(in.v3, in.v3);
+    res3 = fast::clamp(in.v3, in.v3, in.v3);
+    res3 = precise::min(in.v3, in.v3);
+    res3 = precise::max(in.v3, in.v3);
+    res3 = precise::clamp(in.v3, in.v3, in.v3);
+    float4 res4 = fast::min(in.v4, in.v4);
+    res4 = fast::max(in.v4, in.v4);
+    res4 = fast::clamp(in.v4, in.v4, in.v4);
+    res4 = precise::min(in.v4, in.v4);
+    res4 = precise::max(in.v4, in.v4);
+    res4 = precise::clamp(in.v4, in.v4, in.v4);
+    half hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    half2 hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    half3 hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    half4 hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+    hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+}
+
diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index 9f9b827c..db7fc1a3 100644
--- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -102,13 +102,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     _77._m0 = float4(0.0);
     float2 _82 = gl_FragCoord.xy * _19._m23.xy;
     float4 _88 = _7._m2 * _7._m0.xyxy;
-    float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _97 = fast::clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _113 = _12.sample(_13, _97, level(0.0));
     float3 _129;
     if (_113.y > 0.0)
     {
-        _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
+        _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * fast::clamp(_113.y * _113.z, 0.0, 1.0));
     }
     else
     {
@@ -119,13 +119,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
     _28 _135 = _77;
     _135._m0 = _134;
-    float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _160 = _12.sample(_13, _144, level(0.0));
     float3 _176;
     if (_160.y > 0.0)
     {
-        _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
+        _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * fast::clamp(_160.y * _160.z, 0.0, 1.0));
     }
     else
     {
@@ -136,13 +136,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
     _28 _182 = _135;
     _182._m0 = _181;
-    float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _207 = _12.sample(_13, _191, level(0.0));
     float3 _223;
     if (_207.y > 0.0)
     {
-        _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
+        _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * fast::clamp(_207.y * _207.z, 0.0, 1.0));
     }
     else
     {
@@ -153,13 +153,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
     _28 _229 = _182;
     _229._m0 = _228;
-    float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _254 = _12.sample(_13, _238, level(0.0));
     float3 _270;
     if (_254.y > 0.0)
     {
-        _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
+        _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * fast::clamp(_254.y * _254.z, 0.0, 1.0));
     }
     else
     {
@@ -170,13 +170,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
     _28 _276 = _229;
     _276._m0 = _275;
-    float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _301 = _12.sample(_13, _285, level(0.0));
     float3 _317;
     if (_301.y > 0.0)
     {
-        _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
+        _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * fast::clamp(_301.y * _301.z, 0.0, 1.0));
     }
     else
     {
@@ -187,13 +187,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
     _28 _323 = _276;
     _323._m0 = _322;
-    float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _348 = _12.sample(_13, _332, level(0.0));
     float3 _364;
     if (_348.y > 0.0)
     {
-        _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
+        _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * fast::clamp(_348.y * _348.z, 0.0, 1.0));
     }
     else
     {
@@ -204,13 +204,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
     _28 _370 = _323;
     _370._m0 = _369;
-    float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _395 = _12.sample(_13, _379, level(0.0));
     float3 _411;
     if (_395.y > 0.0)
     {
-        _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
+        _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * fast::clamp(_395.y * _395.z, 0.0, 1.0));
     }
     else
     {
@@ -221,13 +221,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
     _28 _417 = _370;
     _417._m0 = _416;
-    float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _442 = _12.sample(_13, _426, level(0.0));
     float3 _458;
     if (_442.y > 0.0)
     {
-        _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
+        _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * fast::clamp(_442.y * _442.z, 0.0, 1.0));
     }
     else
     {
@@ -238,13 +238,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
     _28 _464 = _417;
     _464._m0 = _463;
-    float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _489 = _12.sample(_13, _473, level(0.0));
     float3 _505;
     if (_489.y > 0.0)
     {
-        _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
+        _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * fast::clamp(_489.y * _489.z, 0.0, 1.0));
     }
     else
     {
@@ -255,13 +255,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
     _28 _511 = _464;
     _511._m0 = _510;
-    float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _536 = _12.sample(_13, _520, level(0.0));
     float3 _552;
     if (_536.y > 0.0)
     {
-        _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
+        _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * fast::clamp(_536.y * _536.z, 0.0, 1.0));
     }
     else
     {
@@ -272,13 +272,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
     _28 _558 = _511;
     _558._m0 = _557;
-    float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _583 = _12.sample(_13, _567, level(0.0));
     float3 _599;
     if (_583.y > 0.0)
     {
-        _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
+        _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * fast::clamp(_583.y * _583.z, 0.0, 1.0));
     }
     else
     {
@@ -289,13 +289,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
     _28 _605 = _558;
     _605._m0 = _604;
-    float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _630 = _12.sample(_13, _614, level(0.0));
     float3 _646;
     if (_630.y > 0.0)
     {
-        _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
+        _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * fast::clamp(_630.y * _630.z, 0.0, 1.0));
     }
     else
     {
@@ -306,13 +306,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
     float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
     _28 _652 = _605;
     _652._m0 = _651;
-    float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _677 = _12.sample(_13, _661, level(0.0));
     float3 _693;
     if (_677.y > 0.0)
     {
-        _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
+        _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * fast::clamp(_677.y * _677.z, 0.0, 1.0));
     }
     else
     {
diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert
index 291b1f7a..954f9255 100644
--- a/reference/shaders-msl/flatten/struct.flatten.vert
+++ b/reference/shaders-msl/flatten/struct.flatten.vert
@@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
     out.gl_Position = _18.uMVP * in.aVertex;
     out.vColor = float4(0.0);
     float3 L = in.aVertex.xyz - float3(_18.light.Position);
-    out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+    out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert
index 23e52047..a87b4478 100644
--- a/reference/shaders-msl/vert/copy.flatten.vert
+++ b/reference/shaders-msl/vert/copy.flatten.vert
@@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
         light.Radius = _21.lights[i].Radius;
         light.Color = _21.lights[i].Color;
         float3 L = in.aVertex.xyz - light.Position;
-        out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
     }
     return out;
 }
diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert
index de654a12..c285f3c8 100644
--- a/reference/shaders-msl/vert/dynamic.flatten.vert
+++ b/reference/shaders-msl/vert/dynamic.flatten.vert
@@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     for (int i = 0; i < 4; i++)
     {
         float3 L = in.aVertex.xyz - float3(_21.lights[i].Position);
-        out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
     }
     return out;
 }
diff --git a/shaders-msl/asm/frag/min-max-clamp.asm.frag b/shaders-msl/asm/frag/min-max-clamp.asm.frag
new file mode 100644
index 00000000..3bf8c088
--- /dev/null
+++ b/shaders-msl/asm/frag/min-max-clamp.asm.frag
@@ -0,0 +1,293 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 205
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_AMD_gpu_shader_half_float"
+               OpCapability Float16
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpName %main "main"
+               OpName %res "res"
+               OpName %res2 "res2"
+               OpName %res3 "res3"
+               OpName %res4 "res4"
+               OpName %hres "hres"
+               OpName %hres2 "hres2"
+               OpName %hres3 "hres3"
+               OpName %hres4 "hres4"
+               OpName %v1 "v1"
+               OpName %v2 "v2"
+               OpName %v3 "v3"
+               OpName %v4 "v4"
+               OpName %h1 "h1"
+               OpName %h2 "h2"
+               OpName %h3 "h3"
+               OpName %h4 "h4"
+               OpDecorate %v1 Location 0
+               OpDecorate %v2 Location 1
+               OpDecorate %v3 Location 2
+               OpDecorate %v4 Location 3
+               OpDecorate %h1 Location 4
+               OpDecorate %h2 Location 5
+               OpDecorate %h3 Location 6
+               OpDecorate %h4 Location 7
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Function_half = OpTypePointer Function %half
+%_ptr_Input_half = OpTypePointer Input %half
+%_ptr_Function_v2half = OpTypePointer Function %v2half
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+%_ptr_Function_v3half = OpTypePointer Function %v3half
+%_ptr_Input_v3half = OpTypePointer Input %v3half
+%_ptr_Function_v4half = OpTypePointer Function %v4half
+%_ptr_Input_v4half = OpTypePointer Input %v4half
+         %v1 = OpVariable %_ptr_Input_float Input
+         %v2 = OpVariable %_ptr_Input_v2float Input
+         %v3 = OpVariable %_ptr_Input_v3float Input
+         %v4 = OpVariable %_ptr_Input_v4float Input
+         %h1 = OpVariable %_ptr_Input_half Input
+         %h2 = OpVariable %_ptr_Input_v2half Input
+         %h3 = OpVariable %_ptr_Input_v3half Input
+         %h4 = OpVariable %_ptr_Input_v4half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %res = OpVariable %_ptr_Function_float Function
+         %46 = OpLoad %float %v1
+         %47 = OpLoad %float %v1
+         %48 = OpExtInst %float %1 FMin %46 %47
+               OpStore %res %48
+         %49 = OpLoad %float %v1
+         %50 = OpLoad %float %v1
+         %51 = OpExtInst %float %1 FMax %49 %50
+               OpStore %res %51
+         %52 = OpLoad %float %v1
+         %53 = OpLoad %float %v1
+         %54 = OpLoad %float %v1
+         %55 = OpExtInst %float %1 FClamp %52 %53 %54
+               OpStore %res %55
+         %56 = OpLoad %float %v1
+         %57 = OpLoad %float %v1
+         %58 = OpExtInst %float %1 NMin %56 %57
+               OpStore %res %58
+         %59 = OpLoad %float %v1
+         %60 = OpLoad %float %v1
+         %61 = OpExtInst %float %1 NMax %59 %60
+               OpStore %res %61
+         %62 = OpLoad %float %v1
+         %63 = OpLoad %float %v1
+         %64 = OpLoad %float %v1
+         %65 = OpExtInst %float %1 NClamp %62 %63 %64
+               OpStore %res %65
+       %res2 = OpVariable %_ptr_Function_v2float Function
+         %66 = OpLoad %v2float %v2
+         %67 = OpLoad %v2float %v2
+         %68 = OpExtInst %v2float %1 FMin %66 %67
+               OpStore %res2 %68
+         %69 = OpLoad %v2float %v2
+         %70 = OpLoad %v2float %v2
+         %71 = OpExtInst %v2float %1 FMax %69 %70
+               OpStore %res2 %71
+         %72 = OpLoad %v2float %v2
+         %73 = OpLoad %v2float %v2
+         %74 = OpLoad %v2float %v2
+         %75 = OpExtInst %v2float %1 FClamp %72 %73 %74
+               OpStore %res2 %75
+         %76 = OpLoad %v2float %v2
+         %77 = OpLoad %v2float %v2
+         %78 = OpExtInst %v2float %1 NMin %76 %77
+               OpStore %res2 %78
+         %79 = OpLoad %v2float %v2
+         %80 = OpLoad %v2float %v2
+         %81 = OpExtInst %v2float %1 NMax %79 %80
+               OpStore %res2 %81
+         %82 = OpLoad %v2float %v2
+         %83 = OpLoad %v2float %v2
+         %84 = OpLoad %v2float %v2
+         %85 = OpExtInst %v2float %1 NClamp %82 %83 %84
+               OpStore %res2 %85
+       %res3 = OpVariable %_ptr_Function_v3float Function
+         %86 = OpLoad %v3float %v3
+         %87 = OpLoad %v3float %v3
+         %88 = OpExtInst %v3float %1 FMin %86 %87
+               OpStore %res3 %88
+         %89 = OpLoad %v3float %v3
+         %90 = OpLoad %v3float %v3
+         %91 = OpExtInst %v3float %1 FMax %89 %90
+               OpStore %res3 %91
+         %92 = OpLoad %v3float %v3
+         %93 = OpLoad %v3float %v3
+         %94 = OpLoad %v3float %v3
+         %95 = OpExtInst %v3float %1 FClamp %92 %93 %94
+               OpStore %res3 %95
+         %96 = OpLoad %v3float %v3
+         %97 = OpLoad %v3float %v3
+         %98 = OpExtInst %v3float %1 NMin %96 %97
+               OpStore %res3 %98
+         %99 = OpLoad %v3float %v3
+        %100 = OpLoad %v3float %v3
+        %101 = OpExtInst %v3float %1 NMax %99 %100
+               OpStore %res3 %101
+        %102 = OpLoad %v3float %v3
+        %103 = OpLoad %v3float %v3
+        %104 = OpLoad %v3float %v3
+        %105 = OpExtInst %v3float %1 NClamp %102 %103 %104
+               OpStore %res3 %105
+       %res4 = OpVariable %_ptr_Function_v4float Function
+        %106 = OpLoad %v4float %v4
+        %107 = OpLoad %v4float %v4
+        %108 = OpExtInst %v4float %1 FMin %106 %107
+               OpStore %res4 %108
+        %109 = OpLoad %v4float %v4
+        %110 = OpLoad %v4float %v4
+        %111 = OpExtInst %v4float %1 FMax %109 %110
+               OpStore %res4 %111
+        %112 = OpLoad %v4float %v4
+        %113 = OpLoad %v4float %v4
+        %114 = OpLoad %v4float %v4
+        %115 = OpExtInst %v4float %1 FClamp %112 %113 %114
+               OpStore %res4 %115
+        %116 = OpLoad %v4float %v4
+        %117 = OpLoad %v4float %v4
+        %118 = OpExtInst %v4float %1 NMin %116 %117
+               OpStore %res4 %118
+        %119 = OpLoad %v4float %v4
+        %120 = OpLoad %v4float %v4
+        %121 = OpExtInst %v4float %1 NMax %119 %120
+               OpStore %res4 %121
+        %122 = OpLoad %v4float %v4
+        %123 = OpLoad %v4float %v4
+        %124 = OpLoad %v4float %v4
+        %125 = OpExtInst %v4float %1 NClamp %122 %123 %124
+               OpStore %res4 %125
+       %hres = OpVariable %_ptr_Function_half Function
+        %126 = OpLoad %half %h1
+        %127 = OpLoad %half %h1
+        %128 = OpExtInst %half %1 FMin %126 %127
+               OpStore %hres %128
+        %129 = OpLoad %half %h1
+        %130 = OpLoad %half %h1
+        %131 = OpExtInst %half %1 FMax %129 %130
+               OpStore %hres %131
+        %132 = OpLoad %half %h1
+        %133 = OpLoad %half %h1
+        %134 = OpLoad %half %h1
+        %135 = OpExtInst %half %1 FClamp %132 %133 %134
+               OpStore %hres %135
+        %136 = OpLoad %half %h1
+        %137 = OpLoad %half %h1
+        %138 = OpExtInst %half %1 NMin %136 %137
+               OpStore %hres %138
+        %139 = OpLoad %half %h1
+        %140 = OpLoad %half %h1
+        %141 = OpExtInst %half %1 NMax %139 %140
+               OpStore %hres %141
+        %142 = OpLoad %half %h1
+        %143 = OpLoad %half %h1
+        %144 = OpLoad %half %h1
+        %145 = OpExtInst %half %1 NClamp %142 %143 %144
+               OpStore %hres %145
+      %hres2 = OpVariable %_ptr_Function_v2half Function
+        %146 = OpLoad %v2half %h2
+        %147 = OpLoad %v2half %h2
+        %148 = OpExtInst %v2half %1 FMin %146 %147
+               OpStore %hres2 %148
+        %149 = OpLoad %v2half %h2
+        %150 = OpLoad %v2half %h2
+        %151 = OpExtInst %v2half %1 FMax %149 %150
+               OpStore %hres2 %151
+        %152 = OpLoad %v2half %h2
+        %153 = OpLoad %v2half %h2
+        %154 = OpLoad %v2half %h2
+        %155 = OpExtInst %v2half %1 FClamp %152 %153 %154
+               OpStore %hres2 %155
+        %156 = OpLoad %v2half %h2
+        %157 = OpLoad %v2half %h2
+        %158 = OpExtInst %v2half %1 NMin %156 %157
+               OpStore %hres2 %158
+        %159 = OpLoad %v2half %h2
+        %160 = OpLoad %v2half %h2
+        %161 = OpExtInst %v2half %1 NMax %159 %160
+               OpStore %hres2 %161
+        %162 = OpLoad %v2half %h2
+        %163 = OpLoad %v2half %h2
+        %164 = OpLoad %v2half %h2
+        %165 = OpExtInst %v2half %1 NClamp %162 %163 %164
+               OpStore %hres2 %165
+      %hres3 = OpVariable %_ptr_Function_v3half Function
+        %166 = OpLoad %v3half %h3
+        %167 = OpLoad %v3half %h3
+        %168 = OpExtInst %v3half %1 FMin %166 %167
+               OpStore %hres3 %168
+        %169 = OpLoad %v3half %h3
+        %170 = OpLoad %v3half %h3
+        %171 = OpExtInst %v3half %1 FMax %169 %170
+               OpStore %hres3 %171
+        %172 = OpLoad %v3half %h3
+        %173 = OpLoad %v3half %h3
+        %174 = OpLoad %v3half %h3
+        %175 = OpExtInst %v3half %1 FClamp %172 %173 %174
+               OpStore %hres3 %175
+        %176 = OpLoad %v3half %h3
+        %177 = OpLoad %v3half %h3
+        %178 = OpExtInst %v3half %1 NMin %176 %177
+               OpStore %hres3 %178
+        %179 = OpLoad %v3half %h3
+        %180 = OpLoad %v3half %h3
+        %181 = OpExtInst %v3half %1 NMax %179 %180
+               OpStore %hres3 %181
+        %182 = OpLoad %v3half %h3
+        %183 = OpLoad %v3half %h3
+        %184 = OpLoad %v3half %h3
+        %185 = OpExtInst %v3half %1 NClamp %182 %183 %184
+               OpStore %hres3 %185
+      %hres4 = OpVariable %_ptr_Function_v4half Function
+        %186 = OpLoad %v4half %h4
+        %187 = OpLoad %v4half %h4
+        %188 = OpExtInst %v4half %1 FMin %186 %187
+               OpStore %hres4 %188
+        %189 = OpLoad %v4half %h4
+        %190 = OpLoad %v4half %h4
+        %191 = OpExtInst %v4half %1 FMax %189 %190
+               OpStore %hres4 %191
+        %192 = OpLoad %v4half %h4
+        %193 = OpLoad %v4half %h4
+        %194 = OpLoad %v4half %h4
+        %195 = OpExtInst %v4half %1 FClamp %192 %193 %194
+               OpStore %hres4 %195
+        %196 = OpLoad %v4half %h4
+        %197 = OpLoad %v4half %h4
+        %198 = OpExtInst %v4half %1 NMin %196 %197
+               OpStore %hres4 %198
+        %199 = OpLoad %v4half %h4
+        %200 = OpLoad %v4half %h4
+        %201 = OpExtInst %v4half %1 NMax %199 %200
+               OpStore %hres4 %201
+        %202 = OpLoad %v4half %h4
+        %203 = OpLoad %v4half %h4
+        %204 = OpLoad %v4half %h4
+        %205 = OpExtInst %v4half %1 NClamp %202 %203 %204
+               OpStore %hres4 %205
+               OpReturn
+               OpFunctionEnd
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 05bfb378..7391c5bb 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -2364,6 +2364,53 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 	}
 
+	case GLSLstd450FMin:
+		// If the result type isn't float, don't bother calling the specific
+		// precise::/fast:: version. Metal doesn't have those for half and
+		// double types.
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_binary_func_op(result_type, id, args[0], args[1], "min");
+		else
+			emit_binary_func_op(result_type, id, args[0], args[1], "fast::min");
+		break;
+
+	case GLSLstd450FMax:
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_binary_func_op(result_type, id, args[0], args[1], "max");
+		else
+			emit_binary_func_op(result_type, id, args[0], args[1], "fast::max");
+		break;
+
+	case GLSLstd450FClamp:
+		// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
+		else
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp");
+		break;
+
+	case GLSLstd450NMin:
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_binary_func_op(result_type, id, args[0], args[1], "min");
+		else
+			emit_binary_func_op(result_type, id, args[0], args[1], "precise::min");
+		break;
+
+	case GLSLstd450NMax:
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_binary_func_op(result_type, id, args[0], args[1], "max");
+		else
+			emit_binary_func_op(result_type, id, args[0], args[1], "precise::max");
+		break;
+
+	case GLSLstd450NClamp:
+		// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
+		if (get<SPIRType>(result_type).basetype != SPIRType::Float)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
+		else
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
+		break;
+
 		// TODO:
 		//        GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
 		//        GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)