MSL: Emit F{Min,Max,Clamp} as fast:: and N{Min,Max,Clamp} as precise::.

This roughly matches their semantics in SPIR-V and MSL. For `FMin`,
`FMax`, and `FClamp`, and the Metal functions `fast::min()`,
`fast::max()`, and `fast::clamp()`, the result is undefined if any
operand is NaN. For the 'N' operations and their corresponding MSL
`precise::` functions, the result is consistent with IEEE 754 (first
non-NaN wins; result is NaN if all operands are NaN).

We can only do this with 32-bit floats, though, because Metal only
provides these variants for `float`. `half` only has one variant of
these functions that is presumably consistent with IEEE 754. I guess
that's OK; the SPIR-V spec only says that `F{Min,Max,Clamp}` are
undefined for NaNs. Performance might suffer, though.
This commit is contained in:
Chip Davis 2018-08-31 18:15:07 -05:00
parent 6fd66664e8
commit 27af716c3a
7 changed files with 451 additions and 42 deletions

View File

@ -0,0 +1,69 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_in
{
float v1 [[user(locn0)]];
float2 v2 [[user(locn1)]];
float3 v3 [[user(locn2)]];
float4 v4 [[user(locn3)]];
half h1 [[user(locn4)]];
half2 h2 [[user(locn5)]];
half3 h3 [[user(locn6)]];
half4 h4 [[user(locn7)]];
};
fragment void main0(main0_in in [[stage_in]])
{
float res = fast::min(in.v1, in.v1);
res = fast::max(in.v1, in.v1);
res = fast::clamp(in.v1, in.v1, in.v1);
res = precise::min(in.v1, in.v1);
res = precise::max(in.v1, in.v1);
res = precise::clamp(in.v1, in.v1, in.v1);
float2 res2 = fast::min(in.v2, in.v2);
res2 = fast::max(in.v2, in.v2);
res2 = fast::clamp(in.v2, in.v2, in.v2);
res2 = precise::min(in.v2, in.v2);
res2 = precise::max(in.v2, in.v2);
res2 = precise::clamp(in.v2, in.v2, in.v2);
float3 res3 = fast::min(in.v3, in.v3);
res3 = fast::max(in.v3, in.v3);
res3 = fast::clamp(in.v3, in.v3, in.v3);
res3 = precise::min(in.v3, in.v3);
res3 = precise::max(in.v3, in.v3);
res3 = precise::clamp(in.v3, in.v3, in.v3);
float4 res4 = fast::min(in.v4, in.v4);
res4 = fast::max(in.v4, in.v4);
res4 = fast::clamp(in.v4, in.v4, in.v4);
res4 = precise::min(in.v4, in.v4);
res4 = precise::max(in.v4, in.v4);
res4 = precise::clamp(in.v4, in.v4, in.v4);
half hres = min(in.h1, in.h1);
hres = max(in.h1, in.h1);
hres = clamp(in.h1, in.h1, in.h1);
hres = min(in.h1, in.h1);
hres = max(in.h1, in.h1);
hres = clamp(in.h1, in.h1, in.h1);
half2 hres2 = min(in.h2, in.h2);
hres2 = max(in.h2, in.h2);
hres2 = clamp(in.h2, in.h2, in.h2);
hres2 = min(in.h2, in.h2);
hres2 = max(in.h2, in.h2);
hres2 = clamp(in.h2, in.h2, in.h2);
half3 hres3 = min(in.h3, in.h3);
hres3 = max(in.h3, in.h3);
hres3 = clamp(in.h3, in.h3, in.h3);
hres3 = min(in.h3, in.h3);
hres3 = max(in.h3, in.h3);
hres3 = clamp(in.h3, in.h3, in.h3);
half4 hres4 = min(in.h4, in.h4);
hres4 = max(in.h4, in.h4);
hres4 = clamp(in.h4, in.h4, in.h4);
hres4 = min(in.h4, in.h4);
hres4 = max(in.h4, in.h4);
hres4 = clamp(in.h4, in.h4, in.h4);
}

View File

@ -102,13 +102,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
_77._m0 = float4(0.0);
float2 _82 = gl_FragCoord.xy * _19._m23.xy;
float4 _88 = _7._m2 * _7._m0.xyxy;
float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _97 = fast::clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _113 = _12.sample(_13, _97, level(0.0));
float3 _129;
if (_113.y > 0.0)
{
_129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
_129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * fast::clamp(_113.y * _113.z, 0.0, 1.0));
}
else
{
@ -119,13 +119,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
_28 _135 = _77;
_135._m0 = _134;
float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _160 = _12.sample(_13, _144, level(0.0));
float3 _176;
if (_160.y > 0.0)
{
_176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
_176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * fast::clamp(_160.y * _160.z, 0.0, 1.0));
}
else
{
@ -136,13 +136,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
_28 _182 = _135;
_182._m0 = _181;
float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _207 = _12.sample(_13, _191, level(0.0));
float3 _223;
if (_207.y > 0.0)
{
_223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
_223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * fast::clamp(_207.y * _207.z, 0.0, 1.0));
}
else
{
@ -153,13 +153,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
_28 _229 = _182;
_229._m0 = _228;
float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _254 = _12.sample(_13, _238, level(0.0));
float3 _270;
if (_254.y > 0.0)
{
_270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
_270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * fast::clamp(_254.y * _254.z, 0.0, 1.0));
}
else
{
@ -170,13 +170,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
_28 _276 = _229;
_276._m0 = _275;
float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _301 = _12.sample(_13, _285, level(0.0));
float3 _317;
if (_301.y > 0.0)
{
_317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
_317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * fast::clamp(_301.y * _301.z, 0.0, 1.0));
}
else
{
@ -187,13 +187,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
_28 _323 = _276;
_323._m0 = _322;
float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _348 = _12.sample(_13, _332, level(0.0));
float3 _364;
if (_348.y > 0.0)
{
_364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
_364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * fast::clamp(_348.y * _348.z, 0.0, 1.0));
}
else
{
@ -204,13 +204,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
_28 _370 = _323;
_370._m0 = _369;
float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _395 = _12.sample(_13, _379, level(0.0));
float3 _411;
if (_395.y > 0.0)
{
_411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
_411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * fast::clamp(_395.y * _395.z, 0.0, 1.0));
}
else
{
@ -221,13 +221,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
_28 _417 = _370;
_417._m0 = _416;
float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _442 = _12.sample(_13, _426, level(0.0));
float3 _458;
if (_442.y > 0.0)
{
_458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
_458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * fast::clamp(_442.y * _442.z, 0.0, 1.0));
}
else
{
@ -238,13 +238,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
_28 _464 = _417;
_464._m0 = _463;
float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _489 = _12.sample(_13, _473, level(0.0));
float3 _505;
if (_489.y > 0.0)
{
_505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
_505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * fast::clamp(_489.y * _489.z, 0.0, 1.0));
}
else
{
@ -255,13 +255,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
_28 _511 = _464;
_511._m0 = _510;
float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _536 = _12.sample(_13, _520, level(0.0));
float3 _552;
if (_536.y > 0.0)
{
_552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
_552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * fast::clamp(_536.y * _536.z, 0.0, 1.0));
}
else
{
@ -272,13 +272,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
_28 _558 = _511;
_558._m0 = _557;
float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _583 = _12.sample(_13, _567, level(0.0));
float3 _599;
if (_583.y > 0.0)
{
_599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
_599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * fast::clamp(_583.y * _583.z, 0.0, 1.0));
}
else
{
@ -289,13 +289,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
_28 _605 = _558;
_605._m0 = _604;
float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _630 = _12.sample(_13, _614, level(0.0));
float3 _646;
if (_630.y > 0.0)
{
_646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
_646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * fast::clamp(_630.y * _630.z, 0.0, 1.0));
}
else
{
@ -306,13 +306,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
_28 _652 = _605;
_652._m0 = _651;
float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
float4 _677 = _12.sample(_13, _661, level(0.0));
float3 _693;
if (_677.y > 0.0)
{
_693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
_693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * fast::clamp(_677.y * _677.z, 0.0, 1.0));
}
else
{

View File

@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
out.gl_Position = _18.uMVP * in.aVertex;
out.vColor = float4(0.0);
float3 L = in.aVertex.xyz - float3(_18.light.Position);
out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
return out;
}

View File

@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
light.Radius = _21.lights[i].Radius;
light.Color = _21.lights[i].Color;
float3 L = in.aVertex.xyz - light.Position;
out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
}
return out;
}

View File

@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
for (int i = 0; i < 4; i++)
{
float3 L = in.aVertex.xyz - float3(_21.lights[i].Position);
out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
}
return out;
}

View File

@ -0,0 +1,293 @@
; SPIR-V
; Version: 1.3
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 205
; Schema: 0
OpCapability Shader
OpExtension "SPV_AMD_gpu_shader_half_float"
OpCapability Float16
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_AMD_gpu_shader_half_float"
OpName %main "main"
OpName %res "res"
OpName %res2 "res2"
OpName %res3 "res3"
OpName %res4 "res4"
OpName %hres "hres"
OpName %hres2 "hres2"
OpName %hres3 "hres3"
OpName %hres4 "hres4"
OpName %v1 "v1"
OpName %v2 "v2"
OpName %v3 "v3"
OpName %v4 "v4"
OpName %h1 "h1"
OpName %h2 "h2"
OpName %h3 "h3"
OpName %h4 "h4"
OpDecorate %v1 Location 0
OpDecorate %v2 Location 1
OpDecorate %v3 Location 2
OpDecorate %v4 Location 3
OpDecorate %h1 Location 4
OpDecorate %h2 Location 5
OpDecorate %h3 Location 6
OpDecorate %h4 Location 7
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v2float = OpTypeVector %float 2
%v3float = OpTypeVector %float 3
%v4float = OpTypeVector %float 4
%half = OpTypeFloat 16
%v2half = OpTypeVector %half 2
%v3half = OpTypeVector %half 3
%v4half = OpTypeVector %half 4
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Input_float = OpTypePointer Input %float
%_ptr_Function_v2float = OpTypePointer Function %v2float
%_ptr_Input_v2float = OpTypePointer Input %v2float
%_ptr_Function_v3float = OpTypePointer Function %v3float
%_ptr_Input_v3float = OpTypePointer Input %v3float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_ptr_Function_half = OpTypePointer Function %half
%_ptr_Input_half = OpTypePointer Input %half
%_ptr_Function_v2half = OpTypePointer Function %v2half
%_ptr_Input_v2half = OpTypePointer Input %v2half
%_ptr_Function_v3half = OpTypePointer Function %v3half
%_ptr_Input_v3half = OpTypePointer Input %v3half
%_ptr_Function_v4half = OpTypePointer Function %v4half
%_ptr_Input_v4half = OpTypePointer Input %v4half
%v1 = OpVariable %_ptr_Input_float Input
%v2 = OpVariable %_ptr_Input_v2float Input
%v3 = OpVariable %_ptr_Input_v3float Input
%v4 = OpVariable %_ptr_Input_v4float Input
%h1 = OpVariable %_ptr_Input_half Input
%h2 = OpVariable %_ptr_Input_v2half Input
%h3 = OpVariable %_ptr_Input_v3half Input
%h4 = OpVariable %_ptr_Input_v4half Input
%main = OpFunction %void None %3
%5 = OpLabel
%res = OpVariable %_ptr_Function_float Function
%46 = OpLoad %float %v1
%47 = OpLoad %float %v1
%48 = OpExtInst %float %1 FMin %46 %47
OpStore %res %48
%49 = OpLoad %float %v1
%50 = OpLoad %float %v1
%51 = OpExtInst %float %1 FMax %49 %50
OpStore %res %51
%52 = OpLoad %float %v1
%53 = OpLoad %float %v1
%54 = OpLoad %float %v1
%55 = OpExtInst %float %1 FClamp %52 %53 %54
OpStore %res %55
%56 = OpLoad %float %v1
%57 = OpLoad %float %v1
%58 = OpExtInst %float %1 NMin %56 %57
OpStore %res %58
%59 = OpLoad %float %v1
%60 = OpLoad %float %v1
%61 = OpExtInst %float %1 NMax %59 %60
OpStore %res %61
%62 = OpLoad %float %v1
%63 = OpLoad %float %v1
%64 = OpLoad %float %v1
%65 = OpExtInst %float %1 NClamp %62 %63 %64
OpStore %res %65
%res2 = OpVariable %_ptr_Function_v2float Function
%66 = OpLoad %v2float %v2
%67 = OpLoad %v2float %v2
%68 = OpExtInst %v2float %1 FMin %66 %67
OpStore %res2 %68
%69 = OpLoad %v2float %v2
%70 = OpLoad %v2float %v2
%71 = OpExtInst %v2float %1 FMax %69 %70
OpStore %res2 %71
%72 = OpLoad %v2float %v2
%73 = OpLoad %v2float %v2
%74 = OpLoad %v2float %v2
%75 = OpExtInst %v2float %1 FClamp %72 %73 %74
OpStore %res2 %75
%76 = OpLoad %v2float %v2
%77 = OpLoad %v2float %v2
%78 = OpExtInst %v2float %1 NMin %76 %77
OpStore %res2 %78
%79 = OpLoad %v2float %v2
%80 = OpLoad %v2float %v2
%81 = OpExtInst %v2float %1 NMax %79 %80
OpStore %res2 %81
%82 = OpLoad %v2float %v2
%83 = OpLoad %v2float %v2
%84 = OpLoad %v2float %v2
%85 = OpExtInst %v2float %1 NClamp %82 %83 %84
OpStore %res2 %85
%res3 = OpVariable %_ptr_Function_v3float Function
%86 = OpLoad %v3float %v3
%87 = OpLoad %v3float %v3
%88 = OpExtInst %v3float %1 FMin %86 %87
OpStore %res3 %88
%89 = OpLoad %v3float %v3
%90 = OpLoad %v3float %v3
%91 = OpExtInst %v3float %1 FMax %89 %90
OpStore %res3 %91
%92 = OpLoad %v3float %v3
%93 = OpLoad %v3float %v3
%94 = OpLoad %v3float %v3
%95 = OpExtInst %v3float %1 FClamp %92 %93 %94
OpStore %res3 %95
%96 = OpLoad %v3float %v3
%97 = OpLoad %v3float %v3
%98 = OpExtInst %v3float %1 NMin %96 %97
OpStore %res3 %98
%99 = OpLoad %v3float %v3
%100 = OpLoad %v3float %v3
%101 = OpExtInst %v3float %1 NMax %99 %100
OpStore %res3 %101
%102 = OpLoad %v3float %v3
%103 = OpLoad %v3float %v3
%104 = OpLoad %v3float %v3
%105 = OpExtInst %v3float %1 NClamp %102 %103 %104
OpStore %res3 %105
%res4 = OpVariable %_ptr_Function_v4float Function
%106 = OpLoad %v4float %v4
%107 = OpLoad %v4float %v4
%108 = OpExtInst %v4float %1 FMin %106 %107
OpStore %res4 %108
%109 = OpLoad %v4float %v4
%110 = OpLoad %v4float %v4
%111 = OpExtInst %v4float %1 FMax %109 %110
OpStore %res4 %111
%112 = OpLoad %v4float %v4
%113 = OpLoad %v4float %v4
%114 = OpLoad %v4float %v4
%115 = OpExtInst %v4float %1 FClamp %112 %113 %114
OpStore %res4 %115
%116 = OpLoad %v4float %v4
%117 = OpLoad %v4float %v4
%118 = OpExtInst %v4float %1 NMin %116 %117
OpStore %res4 %118
%119 = OpLoad %v4float %v4
%120 = OpLoad %v4float %v4
%121 = OpExtInst %v4float %1 NMax %119 %120
OpStore %res4 %121
%122 = OpLoad %v4float %v4
%123 = OpLoad %v4float %v4
%124 = OpLoad %v4float %v4
%125 = OpExtInst %v4float %1 NClamp %122 %123 %124
OpStore %res4 %125
%hres = OpVariable %_ptr_Function_half Function
%126 = OpLoad %half %h1
%127 = OpLoad %half %h1
%128 = OpExtInst %half %1 FMin %126 %127
OpStore %hres %128
%129 = OpLoad %half %h1
%130 = OpLoad %half %h1
%131 = OpExtInst %half %1 FMax %129 %130
OpStore %hres %131
%132 = OpLoad %half %h1
%133 = OpLoad %half %h1
%134 = OpLoad %half %h1
%135 = OpExtInst %half %1 FClamp %132 %133 %134
OpStore %hres %135
%136 = OpLoad %half %h1
%137 = OpLoad %half %h1
%138 = OpExtInst %half %1 NMin %136 %137
OpStore %hres %138
%139 = OpLoad %half %h1
%140 = OpLoad %half %h1
%141 = OpExtInst %half %1 NMax %139 %140
OpStore %hres %141
%142 = OpLoad %half %h1
%143 = OpLoad %half %h1
%144 = OpLoad %half %h1
%145 = OpExtInst %half %1 NClamp %142 %143 %144
OpStore %hres %145
%hres2 = OpVariable %_ptr_Function_v2half Function
%146 = OpLoad %v2half %h2
%147 = OpLoad %v2half %h2
%148 = OpExtInst %v2half %1 FMin %146 %147
OpStore %hres2 %148
%149 = OpLoad %v2half %h2
%150 = OpLoad %v2half %h2
%151 = OpExtInst %v2half %1 FMax %149 %150
OpStore %hres2 %151
%152 = OpLoad %v2half %h2
%153 = OpLoad %v2half %h2
%154 = OpLoad %v2half %h2
%155 = OpExtInst %v2half %1 FClamp %152 %153 %154
OpStore %hres2 %155
%156 = OpLoad %v2half %h2
%157 = OpLoad %v2half %h2
%158 = OpExtInst %v2half %1 NMin %156 %157
OpStore %hres2 %158
%159 = OpLoad %v2half %h2
%160 = OpLoad %v2half %h2
%161 = OpExtInst %v2half %1 NMax %159 %160
OpStore %hres2 %161
%162 = OpLoad %v2half %h2
%163 = OpLoad %v2half %h2
%164 = OpLoad %v2half %h2
%165 = OpExtInst %v2half %1 NClamp %162 %163 %164
OpStore %hres2 %165
%hres3 = OpVariable %_ptr_Function_v3half Function
%166 = OpLoad %v3half %h3
%167 = OpLoad %v3half %h3
%168 = OpExtInst %v3half %1 FMin %166 %167
OpStore %hres3 %168
%169 = OpLoad %v3half %h3
%170 = OpLoad %v3half %h3
%171 = OpExtInst %v3half %1 FMax %169 %170
OpStore %hres3 %171
%172 = OpLoad %v3half %h3
%173 = OpLoad %v3half %h3
%174 = OpLoad %v3half %h3
%175 = OpExtInst %v3half %1 FClamp %172 %173 %174
OpStore %hres3 %175
%176 = OpLoad %v3half %h3
%177 = OpLoad %v3half %h3
%178 = OpExtInst %v3half %1 NMin %176 %177
OpStore %hres3 %178
%179 = OpLoad %v3half %h3
%180 = OpLoad %v3half %h3
%181 = OpExtInst %v3half %1 NMax %179 %180
OpStore %hres3 %181
%182 = OpLoad %v3half %h3
%183 = OpLoad %v3half %h3
%184 = OpLoad %v3half %h3
%185 = OpExtInst %v3half %1 NClamp %182 %183 %184
OpStore %hres3 %185
%hres4 = OpVariable %_ptr_Function_v4half Function
%186 = OpLoad %v4half %h4
%187 = OpLoad %v4half %h4
%188 = OpExtInst %v4half %1 FMin %186 %187
OpStore %hres4 %188
%189 = OpLoad %v4half %h4
%190 = OpLoad %v4half %h4
%191 = OpExtInst %v4half %1 FMax %189 %190
OpStore %hres4 %191
%192 = OpLoad %v4half %h4
%193 = OpLoad %v4half %h4
%194 = OpLoad %v4half %h4
%195 = OpExtInst %v4half %1 FClamp %192 %193 %194
OpStore %hres4 %195
%196 = OpLoad %v4half %h4
%197 = OpLoad %v4half %h4
%198 = OpExtInst %v4half %1 NMin %196 %197
OpStore %hres4 %198
%199 = OpLoad %v4half %h4
%200 = OpLoad %v4half %h4
%201 = OpExtInst %v4half %1 NMax %199 %200
OpStore %hres4 %201
%202 = OpLoad %v4half %h4
%203 = OpLoad %v4half %h4
%204 = OpLoad %v4half %h4
%205 = OpExtInst %v4half %1 NClamp %202 %203 %204
OpStore %hres4 %205
OpReturn
OpFunctionEnd

View File

@ -2364,6 +2364,53 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
}
case GLSLstd450FMin:
// If the result type isn't float, don't bother calling the specific
// precise::/fast:: version. Metal doesn't have those for half and
// double types.
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_binary_func_op(result_type, id, args[0], args[1], "min");
else
emit_binary_func_op(result_type, id, args[0], args[1], "fast::min");
break;
case GLSLstd450FMax:
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_binary_func_op(result_type, id, args[0], args[1], "max");
else
emit_binary_func_op(result_type, id, args[0], args[1], "fast::max");
break;
case GLSLstd450FClamp:
// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
else
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp");
break;
case GLSLstd450NMin:
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_binary_func_op(result_type, id, args[0], args[1], "min");
else
emit_binary_func_op(result_type, id, args[0], args[1], "precise::min");
break;
case GLSLstd450NMax:
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_binary_func_op(result_type, id, args[0], args[1], "max");
else
emit_binary_func_op(result_type, id, args[0], args[1], "precise::max");
break;
case GLSLstd450NClamp:
// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
else
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
break;
// TODO:
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)