From 27af716c3ade102d427df06aaedbf25dd220911f Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Fri, 31 Aug 2018 18:15:07 -0500 Subject: [PATCH] MSL: Emit F{Min,Max,Clamp} as fast:: and N{Min,Max,Clamp} as precise::. This roughly matches their semantics in SPIR-V and MSL. For `FMin`, `FMax`, and `FClamp`, and the Metal functions `fast::min()`, `fast::max()`, and `fast::clamp()`, the result is undefined if any operand is NaN. For the 'N' operations and their corresponding MSL `precise::` functions, the result is consistent with IEEE 754 (first non-NaN wins; result is NaN if all operands are NaN). We can only do this with 32-bit floats, though, because Metal only provides these variants for `float`. `half` only has one variant of these functions that is presumably consistent with IEEE 754. I guess that's OK; the SPIR-V spec only says that `F{Min,Max,Clamp}` are undefined for NaNs. Performance might suffer, though. --- .../asm/frag/min-max-clamp.asm.frag | 69 +++++ .../asm/frag/vector-shuffle-oom.asm.frag | 78 ++--- .../shaders-msl/flatten/struct.flatten.vert | 2 +- reference/shaders-msl/vert/copy.flatten.vert | 2 +- .../shaders-msl/vert/dynamic.flatten.vert | 2 +- shaders-msl/asm/frag/min-max-clamp.asm.frag | 293 ++++++++++++++++++ spirv_msl.cpp | 47 +++ 7 files changed, 451 insertions(+), 42 deletions(-) create mode 100644 reference/shaders-msl/asm/frag/min-max-clamp.asm.frag create mode 100644 shaders-msl/asm/frag/min-max-clamp.asm.frag diff --git a/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag b/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag new file mode 100644 index 00000000..f597a6eb --- /dev/null +++ b/reference/shaders-msl/asm/frag/min-max-clamp.asm.frag @@ -0,0 +1,69 @@ +#include +#include + +using namespace metal; + +struct main0_in +{ + float v1 [[user(locn0)]]; + float2 v2 [[user(locn1)]]; + float3 v3 [[user(locn2)]]; + float4 v4 [[user(locn3)]]; + half h1 [[user(locn4)]]; + half2 h2 [[user(locn5)]]; + half3 h3 [[user(locn6)]]; + half4 h4 [[user(locn7)]]; +}; + +fragment void main0(main0_in in [[stage_in]]) +{ + float res = fast::min(in.v1, in.v1); + res = fast::max(in.v1, in.v1); + res = fast::clamp(in.v1, in.v1, in.v1); + res = precise::min(in.v1, in.v1); + res = precise::max(in.v1, in.v1); + res = precise::clamp(in.v1, in.v1, in.v1); + float2 res2 = fast::min(in.v2, in.v2); + res2 = fast::max(in.v2, in.v2); + res2 = fast::clamp(in.v2, in.v2, in.v2); + res2 = precise::min(in.v2, in.v2); + res2 = precise::max(in.v2, in.v2); + res2 = precise::clamp(in.v2, in.v2, in.v2); + float3 res3 = fast::min(in.v3, in.v3); + res3 = fast::max(in.v3, in.v3); + res3 = fast::clamp(in.v3, in.v3, in.v3); + res3 = precise::min(in.v3, in.v3); + res3 = precise::max(in.v3, in.v3); + res3 = precise::clamp(in.v3, in.v3, in.v3); + float4 res4 = fast::min(in.v4, in.v4); + res4 = fast::max(in.v4, in.v4); + res4 = fast::clamp(in.v4, in.v4, in.v4); + res4 = precise::min(in.v4, in.v4); + res4 = precise::max(in.v4, in.v4); + res4 = precise::clamp(in.v4, in.v4, in.v4); + half hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + half2 hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + half3 hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + half4 hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); + hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); +} + diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag index 9f9b827c..db7fc1a3 100644 --- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag +++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag @@ -102,13 +102,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff _77._m0 = float4(0.0); float2 _82 = gl_FragCoord.xy * _19._m23.xy; float4 _88 = _7._m2 * _7._m0.xyxy; - float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _97 = fast::clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _113 = _12.sample(_13, _97, level(0.0)); float3 _129; if (_113.y > 0.0) { - _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_113.y * _113.z, 0.0, 1.0)); + _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * fast::clamp(_113.y * _113.z, 0.0, 1.0)); } else { @@ -119,13 +119,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w); _28 _135 = _77; _135._m0 = _134; - float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _160 = _12.sample(_13, _144, level(0.0)); float3 _176; if (_160.y > 0.0) { - _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_160.y * _160.z, 0.0, 1.0)); + _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * fast::clamp(_160.y * _160.z, 0.0, 1.0)); } else { @@ -136,13 +136,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _181 = float4(_180.x, _180.y, _180.z, _134.w); _28 _182 = _135; _182._m0 = _181; - float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _207 = _12.sample(_13, _191, level(0.0)); float3 _223; if (_207.y > 0.0) { - _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_207.y * _207.z, 0.0, 1.0)); + _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * fast::clamp(_207.y * _207.z, 0.0, 1.0)); } else { @@ -153,13 +153,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _228 = float4(_227.x, _227.y, _227.z, _181.w); _28 _229 = _182; _229._m0 = _228; - float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _254 = _12.sample(_13, _238, level(0.0)); float3 _270; if (_254.y > 0.0) { - _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_254.y * _254.z, 0.0, 1.0)); + _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * fast::clamp(_254.y * _254.z, 0.0, 1.0)); } else { @@ -170,13 +170,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _275 = float4(_274.x, _274.y, _274.z, _228.w); _28 _276 = _229; _276._m0 = _275; - float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _301 = _12.sample(_13, _285, level(0.0)); float3 _317; if (_301.y > 0.0) { - _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_301.y * _301.z, 0.0, 1.0)); + _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * fast::clamp(_301.y * _301.z, 0.0, 1.0)); } else { @@ -187,13 +187,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _322 = float4(_321.x, _321.y, _321.z, _275.w); _28 _323 = _276; _323._m0 = _322; - float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _348 = _12.sample(_13, _332, level(0.0)); float3 _364; if (_348.y > 0.0) { - _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_348.y * _348.z, 0.0, 1.0)); + _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * fast::clamp(_348.y * _348.z, 0.0, 1.0)); } else { @@ -204,13 +204,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _369 = float4(_368.x, _368.y, _368.z, _322.w); _28 _370 = _323; _370._m0 = _369; - float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _395 = _12.sample(_13, _379, level(0.0)); float3 _411; if (_395.y > 0.0) { - _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_395.y * _395.z, 0.0, 1.0)); + _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * fast::clamp(_395.y * _395.z, 0.0, 1.0)); } else { @@ -221,13 +221,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _416 = float4(_415.x, _415.y, _415.z, _369.w); _28 _417 = _370; _417._m0 = _416; - float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _442 = _12.sample(_13, _426, level(0.0)); float3 _458; if (_442.y > 0.0) { - _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_442.y * _442.z, 0.0, 1.0)); + _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * fast::clamp(_442.y * _442.z, 0.0, 1.0)); } else { @@ -238,13 +238,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _463 = float4(_462.x, _462.y, _462.z, _416.w); _28 _464 = _417; _464._m0 = _463; - float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _489 = _12.sample(_13, _473, level(0.0)); float3 _505; if (_489.y > 0.0) { - _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_489.y * _489.z, 0.0, 1.0)); + _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * fast::clamp(_489.y * _489.z, 0.0, 1.0)); } else { @@ -255,13 +255,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _510 = float4(_509.x, _509.y, _509.z, _463.w); _28 _511 = _464; _511._m0 = _510; - float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _536 = _12.sample(_13, _520, level(0.0)); float3 _552; if (_536.y > 0.0) { - _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_536.y * _536.z, 0.0, 1.0)); + _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * fast::clamp(_536.y * _536.z, 0.0, 1.0)); } else { @@ -272,13 +272,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _557 = float4(_556.x, _556.y, _556.z, _510.w); _28 _558 = _511; _558._m0 = _557; - float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _583 = _12.sample(_13, _567, level(0.0)); float3 _599; if (_583.y > 0.0) { - _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_583.y * _583.z, 0.0, 1.0)); + _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * fast::clamp(_583.y * _583.z, 0.0, 1.0)); } else { @@ -289,13 +289,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _604 = float4(_603.x, _603.y, _603.z, _557.w); _28 _605 = _558; _605._m0 = _604; - float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _630 = _12.sample(_13, _614, level(0.0)); float3 _646; if (_630.y > 0.0) { - _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_630.y * _630.z, 0.0, 1.0)); + _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * fast::clamp(_630.y * _630.z, 0.0, 1.0)); } else { @@ -306,13 +306,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff float4 _651 = float4(_650.x, _650.y, _650.z, _604.w); _28 _652 = _605; _652._m0 = _651; - float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); - float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); + float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw); + float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0); float4 _677 = _12.sample(_13, _661, level(0.0)); float3 _693; if (_677.y > 0.0) { - _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_677.y * _677.z, 0.0, 1.0)); + _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * fast::clamp(_677.y * _677.z, 0.0, 1.0)); } else { diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert index 291b1f7a..954f9255 100644 --- a/reference/shaders-msl/flatten/struct.flatten.vert +++ b/reference/shaders-msl/flatten/struct.flatten.vert @@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); float3 L = in.aVertex.xyz - float3(_18.light.Position); - out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); return out; } diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert index 23e52047..a87b4478 100644 --- a/reference/shaders-msl/vert/copy.flatten.vert +++ b/reference/shaders-msl/vert/copy.flatten.vert @@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] light.Radius = _21.lights[i].Radius; light.Color = _21.lights[i].Color; float3 L = in.aVertex.xyz - light.Position; - out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); } return out; } diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert index de654a12..c285f3c8 100644 --- a/reference/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/shaders-msl/vert/dynamic.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int i = 0; i < 4; i++) { float3 L = in.aVertex.xyz - float3(_21.lights[i].Position); - out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); } return out; } diff --git a/shaders-msl/asm/frag/min-max-clamp.asm.frag b/shaders-msl/asm/frag/min-max-clamp.asm.frag new file mode 100644 index 00000000..3bf8c088 --- /dev/null +++ b/shaders-msl/asm/frag/min-max-clamp.asm.frag @@ -0,0 +1,293 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 205 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_AMD_gpu_shader_half_float" + OpCapability Float16 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4 + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpName %main "main" + OpName %res "res" + OpName %res2 "res2" + OpName %res3 "res3" + OpName %res4 "res4" + OpName %hres "hres" + OpName %hres2 "hres2" + OpName %hres3 "hres3" + OpName %hres4 "hres4" + OpName %v1 "v1" + OpName %v2 "v2" + OpName %v3 "v3" + OpName %v4 "v4" + OpName %h1 "h1" + OpName %h2 "h2" + OpName %h3 "h3" + OpName %h4 "h4" + OpDecorate %v1 Location 0 + OpDecorate %v2 Location 1 + OpDecorate %v3 Location 2 + OpDecorate %v4 Location 3 + OpDecorate %h1 Location 4 + OpDecorate %h2 Location 5 + OpDecorate %h3 Location 6 + OpDecorate %h4 Location 7 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Function_v3float = OpTypePointer Function %v3float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Function_half = OpTypePointer Function %half +%_ptr_Input_half = OpTypePointer Input %half +%_ptr_Function_v2half = OpTypePointer Function %v2half +%_ptr_Input_v2half = OpTypePointer Input %v2half +%_ptr_Function_v3half = OpTypePointer Function %v3half +%_ptr_Input_v3half = OpTypePointer Input %v3half +%_ptr_Function_v4half = OpTypePointer Function %v4half +%_ptr_Input_v4half = OpTypePointer Input %v4half + %v1 = OpVariable %_ptr_Input_float Input + %v2 = OpVariable %_ptr_Input_v2float Input + %v3 = OpVariable %_ptr_Input_v3float Input + %v4 = OpVariable %_ptr_Input_v4float Input + %h1 = OpVariable %_ptr_Input_half Input + %h2 = OpVariable %_ptr_Input_v2half Input + %h3 = OpVariable %_ptr_Input_v3half Input + %h4 = OpVariable %_ptr_Input_v4half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %res = OpVariable %_ptr_Function_float Function + %46 = OpLoad %float %v1 + %47 = OpLoad %float %v1 + %48 = OpExtInst %float %1 FMin %46 %47 + OpStore %res %48 + %49 = OpLoad %float %v1 + %50 = OpLoad %float %v1 + %51 = OpExtInst %float %1 FMax %49 %50 + OpStore %res %51 + %52 = OpLoad %float %v1 + %53 = OpLoad %float %v1 + %54 = OpLoad %float %v1 + %55 = OpExtInst %float %1 FClamp %52 %53 %54 + OpStore %res %55 + %56 = OpLoad %float %v1 + %57 = OpLoad %float %v1 + %58 = OpExtInst %float %1 NMin %56 %57 + OpStore %res %58 + %59 = OpLoad %float %v1 + %60 = OpLoad %float %v1 + %61 = OpExtInst %float %1 NMax %59 %60 + OpStore %res %61 + %62 = OpLoad %float %v1 + %63 = OpLoad %float %v1 + %64 = OpLoad %float %v1 + %65 = OpExtInst %float %1 NClamp %62 %63 %64 + OpStore %res %65 + %res2 = OpVariable %_ptr_Function_v2float Function + %66 = OpLoad %v2float %v2 + %67 = OpLoad %v2float %v2 + %68 = OpExtInst %v2float %1 FMin %66 %67 + OpStore %res2 %68 + %69 = OpLoad %v2float %v2 + %70 = OpLoad %v2float %v2 + %71 = OpExtInst %v2float %1 FMax %69 %70 + OpStore %res2 %71 + %72 = OpLoad %v2float %v2 + %73 = OpLoad %v2float %v2 + %74 = OpLoad %v2float %v2 + %75 = OpExtInst %v2float %1 FClamp %72 %73 %74 + OpStore %res2 %75 + %76 = OpLoad %v2float %v2 + %77 = OpLoad %v2float %v2 + %78 = OpExtInst %v2float %1 NMin %76 %77 + OpStore %res2 %78 + %79 = OpLoad %v2float %v2 + %80 = OpLoad %v2float %v2 + %81 = OpExtInst %v2float %1 NMax %79 %80 + OpStore %res2 %81 + %82 = OpLoad %v2float %v2 + %83 = OpLoad %v2float %v2 + %84 = OpLoad %v2float %v2 + %85 = OpExtInst %v2float %1 NClamp %82 %83 %84 + OpStore %res2 %85 + %res3 = OpVariable %_ptr_Function_v3float Function + %86 = OpLoad %v3float %v3 + %87 = OpLoad %v3float %v3 + %88 = OpExtInst %v3float %1 FMin %86 %87 + OpStore %res3 %88 + %89 = OpLoad %v3float %v3 + %90 = OpLoad %v3float %v3 + %91 = OpExtInst %v3float %1 FMax %89 %90 + OpStore %res3 %91 + %92 = OpLoad %v3float %v3 + %93 = OpLoad %v3float %v3 + %94 = OpLoad %v3float %v3 + %95 = OpExtInst %v3float %1 FClamp %92 %93 %94 + OpStore %res3 %95 + %96 = OpLoad %v3float %v3 + %97 = OpLoad %v3float %v3 + %98 = OpExtInst %v3float %1 NMin %96 %97 + OpStore %res3 %98 + %99 = OpLoad %v3float %v3 + %100 = OpLoad %v3float %v3 + %101 = OpExtInst %v3float %1 NMax %99 %100 + OpStore %res3 %101 + %102 = OpLoad %v3float %v3 + %103 = OpLoad %v3float %v3 + %104 = OpLoad %v3float %v3 + %105 = OpExtInst %v3float %1 NClamp %102 %103 %104 + OpStore %res3 %105 + %res4 = OpVariable %_ptr_Function_v4float Function + %106 = OpLoad %v4float %v4 + %107 = OpLoad %v4float %v4 + %108 = OpExtInst %v4float %1 FMin %106 %107 + OpStore %res4 %108 + %109 = OpLoad %v4float %v4 + %110 = OpLoad %v4float %v4 + %111 = OpExtInst %v4float %1 FMax %109 %110 + OpStore %res4 %111 + %112 = OpLoad %v4float %v4 + %113 = OpLoad %v4float %v4 + %114 = OpLoad %v4float %v4 + %115 = OpExtInst %v4float %1 FClamp %112 %113 %114 + OpStore %res4 %115 + %116 = OpLoad %v4float %v4 + %117 = OpLoad %v4float %v4 + %118 = OpExtInst %v4float %1 NMin %116 %117 + OpStore %res4 %118 + %119 = OpLoad %v4float %v4 + %120 = OpLoad %v4float %v4 + %121 = OpExtInst %v4float %1 NMax %119 %120 + OpStore %res4 %121 + %122 = OpLoad %v4float %v4 + %123 = OpLoad %v4float %v4 + %124 = OpLoad %v4float %v4 + %125 = OpExtInst %v4float %1 NClamp %122 %123 %124 + OpStore %res4 %125 + %hres = OpVariable %_ptr_Function_half Function + %126 = OpLoad %half %h1 + %127 = OpLoad %half %h1 + %128 = OpExtInst %half %1 FMin %126 %127 + OpStore %hres %128 + %129 = OpLoad %half %h1 + %130 = OpLoad %half %h1 + %131 = OpExtInst %half %1 FMax %129 %130 + OpStore %hres %131 + %132 = OpLoad %half %h1 + %133 = OpLoad %half %h1 + %134 = OpLoad %half %h1 + %135 = OpExtInst %half %1 FClamp %132 %133 %134 + OpStore %hres %135 + %136 = OpLoad %half %h1 + %137 = OpLoad %half %h1 + %138 = OpExtInst %half %1 NMin %136 %137 + OpStore %hres %138 + %139 = OpLoad %half %h1 + %140 = OpLoad %half %h1 + %141 = OpExtInst %half %1 NMax %139 %140 + OpStore %hres %141 + %142 = OpLoad %half %h1 + %143 = OpLoad %half %h1 + %144 = OpLoad %half %h1 + %145 = OpExtInst %half %1 NClamp %142 %143 %144 + OpStore %hres %145 + %hres2 = OpVariable %_ptr_Function_v2half Function + %146 = OpLoad %v2half %h2 + %147 = OpLoad %v2half %h2 + %148 = OpExtInst %v2half %1 FMin %146 %147 + OpStore %hres2 %148 + %149 = OpLoad %v2half %h2 + %150 = OpLoad %v2half %h2 + %151 = OpExtInst %v2half %1 FMax %149 %150 + OpStore %hres2 %151 + %152 = OpLoad %v2half %h2 + %153 = OpLoad %v2half %h2 + %154 = OpLoad %v2half %h2 + %155 = OpExtInst %v2half %1 FClamp %152 %153 %154 + OpStore %hres2 %155 + %156 = OpLoad %v2half %h2 + %157 = OpLoad %v2half %h2 + %158 = OpExtInst %v2half %1 NMin %156 %157 + OpStore %hres2 %158 + %159 = OpLoad %v2half %h2 + %160 = OpLoad %v2half %h2 + %161 = OpExtInst %v2half %1 NMax %159 %160 + OpStore %hres2 %161 + %162 = OpLoad %v2half %h2 + %163 = OpLoad %v2half %h2 + %164 = OpLoad %v2half %h2 + %165 = OpExtInst %v2half %1 NClamp %162 %163 %164 + OpStore %hres2 %165 + %hres3 = OpVariable %_ptr_Function_v3half Function + %166 = OpLoad %v3half %h3 + %167 = OpLoad %v3half %h3 + %168 = OpExtInst %v3half %1 FMin %166 %167 + OpStore %hres3 %168 + %169 = OpLoad %v3half %h3 + %170 = OpLoad %v3half %h3 + %171 = OpExtInst %v3half %1 FMax %169 %170 + OpStore %hres3 %171 + %172 = OpLoad %v3half %h3 + %173 = OpLoad %v3half %h3 + %174 = OpLoad %v3half %h3 + %175 = OpExtInst %v3half %1 FClamp %172 %173 %174 + OpStore %hres3 %175 + %176 = OpLoad %v3half %h3 + %177 = OpLoad %v3half %h3 + %178 = OpExtInst %v3half %1 NMin %176 %177 + OpStore %hres3 %178 + %179 = OpLoad %v3half %h3 + %180 = OpLoad %v3half %h3 + %181 = OpExtInst %v3half %1 NMax %179 %180 + OpStore %hres3 %181 + %182 = OpLoad %v3half %h3 + %183 = OpLoad %v3half %h3 + %184 = OpLoad %v3half %h3 + %185 = OpExtInst %v3half %1 NClamp %182 %183 %184 + OpStore %hres3 %185 + %hres4 = OpVariable %_ptr_Function_v4half Function + %186 = OpLoad %v4half %h4 + %187 = OpLoad %v4half %h4 + %188 = OpExtInst %v4half %1 FMin %186 %187 + OpStore %hres4 %188 + %189 = OpLoad %v4half %h4 + %190 = OpLoad %v4half %h4 + %191 = OpExtInst %v4half %1 FMax %189 %190 + OpStore %hres4 %191 + %192 = OpLoad %v4half %h4 + %193 = OpLoad %v4half %h4 + %194 = OpLoad %v4half %h4 + %195 = OpExtInst %v4half %1 FClamp %192 %193 %194 + OpStore %hres4 %195 + %196 = OpLoad %v4half %h4 + %197 = OpLoad %v4half %h4 + %198 = OpExtInst %v4half %1 NMin %196 %197 + OpStore %hres4 %198 + %199 = OpLoad %v4half %h4 + %200 = OpLoad %v4half %h4 + %201 = OpExtInst %v4half %1 NMax %199 %200 + OpStore %hres4 %201 + %202 = OpLoad %v4half %h4 + %203 = OpLoad %v4half %h4 + %204 = OpLoad %v4half %h4 + %205 = OpExtInst %v4half %1 NClamp %202 %203 %204 + OpStore %hres4 %205 + OpReturn + OpFunctionEnd diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 05bfb378..7391c5bb 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -2364,6 +2364,53 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; } + case GLSLstd450FMin: + // If the result type isn't float, don't bother calling the specific + // precise::/fast:: version. Metal doesn't have those for half and + // double types. + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::min"); + break; + + case GLSLstd450FMax: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::max"); + break; + + case GLSLstd450FClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp"); + break; + + case GLSLstd450NMin: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::min"); + break; + + case GLSLstd450NMax: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::max"); + break; + + case GLSLstd450NClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp"); + break; + // TODO: // GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier) // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)