MSL: Emit F{Min,Max,Clamp} as fast:: and N{Min,Max,Clamp} as precise::.
This roughly matches their semantics in SPIR-V and MSL. For `FMin`, `FMax`, and `FClamp`, and the Metal functions `fast::min()`, `fast::max()`, and `fast::clamp()`, the result is undefined if any operand is NaN. For the 'N' operations and their corresponding MSL `precise::` functions, the result is consistent with IEEE 754 (first non-NaN wins; result is NaN if all operands are NaN). We can only do this with 32-bit floats, though, because Metal only provides these variants for `float`. `half` only has one variant of these functions that is presumably consistent with IEEE 754. I guess that's OK; the SPIR-V spec only says that `F{Min,Max,Clamp}` are undefined for NaNs. Performance might suffer, though.
This commit is contained in:
parent
6fd66664e8
commit
27af716c3a
69
reference/shaders-msl/asm/frag/min-max-clamp.asm.frag
Normal file
69
reference/shaders-msl/asm/frag/min-max-clamp.asm.frag
Normal file
@ -0,0 +1,69 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float v1 [[user(locn0)]];
|
||||
float2 v2 [[user(locn1)]];
|
||||
float3 v3 [[user(locn2)]];
|
||||
float4 v4 [[user(locn3)]];
|
||||
half h1 [[user(locn4)]];
|
||||
half2 h2 [[user(locn5)]];
|
||||
half3 h3 [[user(locn6)]];
|
||||
half4 h4 [[user(locn7)]];
|
||||
};
|
||||
|
||||
fragment void main0(main0_in in [[stage_in]])
|
||||
{
|
||||
float res = fast::min(in.v1, in.v1);
|
||||
res = fast::max(in.v1, in.v1);
|
||||
res = fast::clamp(in.v1, in.v1, in.v1);
|
||||
res = precise::min(in.v1, in.v1);
|
||||
res = precise::max(in.v1, in.v1);
|
||||
res = precise::clamp(in.v1, in.v1, in.v1);
|
||||
float2 res2 = fast::min(in.v2, in.v2);
|
||||
res2 = fast::max(in.v2, in.v2);
|
||||
res2 = fast::clamp(in.v2, in.v2, in.v2);
|
||||
res2 = precise::min(in.v2, in.v2);
|
||||
res2 = precise::max(in.v2, in.v2);
|
||||
res2 = precise::clamp(in.v2, in.v2, in.v2);
|
||||
float3 res3 = fast::min(in.v3, in.v3);
|
||||
res3 = fast::max(in.v3, in.v3);
|
||||
res3 = fast::clamp(in.v3, in.v3, in.v3);
|
||||
res3 = precise::min(in.v3, in.v3);
|
||||
res3 = precise::max(in.v3, in.v3);
|
||||
res3 = precise::clamp(in.v3, in.v3, in.v3);
|
||||
float4 res4 = fast::min(in.v4, in.v4);
|
||||
res4 = fast::max(in.v4, in.v4);
|
||||
res4 = fast::clamp(in.v4, in.v4, in.v4);
|
||||
res4 = precise::min(in.v4, in.v4);
|
||||
res4 = precise::max(in.v4, in.v4);
|
||||
res4 = precise::clamp(in.v4, in.v4, in.v4);
|
||||
half hres = min(in.h1, in.h1);
|
||||
hres = max(in.h1, in.h1);
|
||||
hres = clamp(in.h1, in.h1, in.h1);
|
||||
hres = min(in.h1, in.h1);
|
||||
hres = max(in.h1, in.h1);
|
||||
hres = clamp(in.h1, in.h1, in.h1);
|
||||
half2 hres2 = min(in.h2, in.h2);
|
||||
hres2 = max(in.h2, in.h2);
|
||||
hres2 = clamp(in.h2, in.h2, in.h2);
|
||||
hres2 = min(in.h2, in.h2);
|
||||
hres2 = max(in.h2, in.h2);
|
||||
hres2 = clamp(in.h2, in.h2, in.h2);
|
||||
half3 hres3 = min(in.h3, in.h3);
|
||||
hres3 = max(in.h3, in.h3);
|
||||
hres3 = clamp(in.h3, in.h3, in.h3);
|
||||
hres3 = min(in.h3, in.h3);
|
||||
hres3 = max(in.h3, in.h3);
|
||||
hres3 = clamp(in.h3, in.h3, in.h3);
|
||||
half4 hres4 = min(in.h4, in.h4);
|
||||
hres4 = max(in.h4, in.h4);
|
||||
hres4 = clamp(in.h4, in.h4, in.h4);
|
||||
hres4 = min(in.h4, in.h4);
|
||||
hres4 = max(in.h4, in.h4);
|
||||
hres4 = clamp(in.h4, in.h4, in.h4);
|
||||
}
|
||||
|
@ -102,13 +102,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
_77._m0 = float4(0.0);
|
||||
float2 _82 = gl_FragCoord.xy * _19._m23.xy;
|
||||
float4 _88 = _7._m2 * _7._m0.xyxy;
|
||||
float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _97 = fast::clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _113 = _12.sample(_13, _97, level(0.0));
|
||||
float3 _129;
|
||||
if (_113.y > 0.0)
|
||||
{
|
||||
_129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
|
||||
_129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * fast::clamp(_113.y * _113.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -119,13 +119,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
|
||||
_28 _135 = _77;
|
||||
_135._m0 = _134;
|
||||
float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _160 = _12.sample(_13, _144, level(0.0));
|
||||
float3 _176;
|
||||
if (_160.y > 0.0)
|
||||
{
|
||||
_176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
|
||||
_176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * fast::clamp(_160.y * _160.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -136,13 +136,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
|
||||
_28 _182 = _135;
|
||||
_182._m0 = _181;
|
||||
float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _207 = _12.sample(_13, _191, level(0.0));
|
||||
float3 _223;
|
||||
if (_207.y > 0.0)
|
||||
{
|
||||
_223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
|
||||
_223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * fast::clamp(_207.y * _207.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -153,13 +153,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
|
||||
_28 _229 = _182;
|
||||
_229._m0 = _228;
|
||||
float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _254 = _12.sample(_13, _238, level(0.0));
|
||||
float3 _270;
|
||||
if (_254.y > 0.0)
|
||||
{
|
||||
_270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
|
||||
_270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * fast::clamp(_254.y * _254.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -170,13 +170,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
|
||||
_28 _276 = _229;
|
||||
_276._m0 = _275;
|
||||
float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _301 = _12.sample(_13, _285, level(0.0));
|
||||
float3 _317;
|
||||
if (_301.y > 0.0)
|
||||
{
|
||||
_317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
|
||||
_317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * fast::clamp(_301.y * _301.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -187,13 +187,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
|
||||
_28 _323 = _276;
|
||||
_323._m0 = _322;
|
||||
float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _348 = _12.sample(_13, _332, level(0.0));
|
||||
float3 _364;
|
||||
if (_348.y > 0.0)
|
||||
{
|
||||
_364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
|
||||
_364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * fast::clamp(_348.y * _348.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -204,13 +204,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
|
||||
_28 _370 = _323;
|
||||
_370._m0 = _369;
|
||||
float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _395 = _12.sample(_13, _379, level(0.0));
|
||||
float3 _411;
|
||||
if (_395.y > 0.0)
|
||||
{
|
||||
_411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
|
||||
_411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * fast::clamp(_395.y * _395.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -221,13 +221,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
|
||||
_28 _417 = _370;
|
||||
_417._m0 = _416;
|
||||
float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _442 = _12.sample(_13, _426, level(0.0));
|
||||
float3 _458;
|
||||
if (_442.y > 0.0)
|
||||
{
|
||||
_458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
|
||||
_458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * fast::clamp(_442.y * _442.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -238,13 +238,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
|
||||
_28 _464 = _417;
|
||||
_464._m0 = _463;
|
||||
float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _489 = _12.sample(_13, _473, level(0.0));
|
||||
float3 _505;
|
||||
if (_489.y > 0.0)
|
||||
{
|
||||
_505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
|
||||
_505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * fast::clamp(_489.y * _489.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -255,13 +255,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
|
||||
_28 _511 = _464;
|
||||
_511._m0 = _510;
|
||||
float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _536 = _12.sample(_13, _520, level(0.0));
|
||||
float3 _552;
|
||||
if (_536.y > 0.0)
|
||||
{
|
||||
_552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
|
||||
_552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * fast::clamp(_536.y * _536.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -272,13 +272,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
|
||||
_28 _558 = _511;
|
||||
_558._m0 = _557;
|
||||
float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _583 = _12.sample(_13, _567, level(0.0));
|
||||
float3 _599;
|
||||
if (_583.y > 0.0)
|
||||
{
|
||||
_599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
|
||||
_599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * fast::clamp(_583.y * _583.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -289,13 +289,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
|
||||
_28 _605 = _558;
|
||||
_605._m0 = _604;
|
||||
float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _630 = _12.sample(_13, _614, level(0.0));
|
||||
float3 _646;
|
||||
if (_630.y > 0.0)
|
||||
{
|
||||
_646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
|
||||
_646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * fast::clamp(_630.y * _630.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -306,13 +306,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buff
|
||||
float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
|
||||
_28 _652 = _605;
|
||||
_652._m0 = _651;
|
||||
float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
|
||||
float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
|
||||
float4 _677 = _12.sample(_13, _661, level(0.0));
|
||||
float3 _693;
|
||||
if (_677.y > 0.0)
|
||||
{
|
||||
_693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
|
||||
_693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * fast::clamp(_677.y * _677.z, 0.0, 1.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
|
||||
out.gl_Position = _18.uMVP * in.aVertex;
|
||||
out.vColor = float4(0.0);
|
||||
float3 L = in.aVertex.xyz - float3(_18.light.Position);
|
||||
out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
|
||||
light.Radius = _21.lights[i].Radius;
|
||||
light.Color = _21.lights[i].Color;
|
||||
float3 L = in.aVertex.xyz - light.Position;
|
||||
out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
float3 L = in.aVertex.xyz - float3(_21.lights[i].Position);
|
||||
out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
293
shaders-msl/asm/frag/min-max-clamp.asm.frag
Normal file
293
shaders-msl/asm/frag/min-max-clamp.asm.frag
Normal file
@ -0,0 +1,293 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Khronos Glslang Reference Front End; 7
|
||||
; Bound: 205
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpExtension "SPV_AMD_gpu_shader_half_float"
|
||||
OpCapability Float16
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 450
|
||||
OpSourceExtension "GL_AMD_gpu_shader_half_float"
|
||||
OpName %main "main"
|
||||
OpName %res "res"
|
||||
OpName %res2 "res2"
|
||||
OpName %res3 "res3"
|
||||
OpName %res4 "res4"
|
||||
OpName %hres "hres"
|
||||
OpName %hres2 "hres2"
|
||||
OpName %hres3 "hres3"
|
||||
OpName %hres4 "hres4"
|
||||
OpName %v1 "v1"
|
||||
OpName %v2 "v2"
|
||||
OpName %v3 "v3"
|
||||
OpName %v4 "v4"
|
||||
OpName %h1 "h1"
|
||||
OpName %h2 "h2"
|
||||
OpName %h3 "h3"
|
||||
OpName %h4 "h4"
|
||||
OpDecorate %v1 Location 0
|
||||
OpDecorate %v2 Location 1
|
||||
OpDecorate %v3 Location 2
|
||||
OpDecorate %v4 Location 3
|
||||
OpDecorate %h1 Location 4
|
||||
OpDecorate %h2 Location 5
|
||||
OpDecorate %h3 Location 6
|
||||
OpDecorate %h4 Location 7
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v2float = OpTypeVector %float 2
|
||||
%v3float = OpTypeVector %float 3
|
||||
%v4float = OpTypeVector %float 4
|
||||
%half = OpTypeFloat 16
|
||||
%v2half = OpTypeVector %half 2
|
||||
%v3half = OpTypeVector %half 3
|
||||
%v4half = OpTypeVector %half 4
|
||||
%_ptr_Function_float = OpTypePointer Function %float
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%_ptr_Function_v2float = OpTypePointer Function %v2float
|
||||
%_ptr_Input_v2float = OpTypePointer Input %v2float
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%_ptr_Input_v3float = OpTypePointer Input %v3float
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_ptr_Function_half = OpTypePointer Function %half
|
||||
%_ptr_Input_half = OpTypePointer Input %half
|
||||
%_ptr_Function_v2half = OpTypePointer Function %v2half
|
||||
%_ptr_Input_v2half = OpTypePointer Input %v2half
|
||||
%_ptr_Function_v3half = OpTypePointer Function %v3half
|
||||
%_ptr_Input_v3half = OpTypePointer Input %v3half
|
||||
%_ptr_Function_v4half = OpTypePointer Function %v4half
|
||||
%_ptr_Input_v4half = OpTypePointer Input %v4half
|
||||
%v1 = OpVariable %_ptr_Input_float Input
|
||||
%v2 = OpVariable %_ptr_Input_v2float Input
|
||||
%v3 = OpVariable %_ptr_Input_v3float Input
|
||||
%v4 = OpVariable %_ptr_Input_v4float Input
|
||||
%h1 = OpVariable %_ptr_Input_half Input
|
||||
%h2 = OpVariable %_ptr_Input_v2half Input
|
||||
%h3 = OpVariable %_ptr_Input_v3half Input
|
||||
%h4 = OpVariable %_ptr_Input_v4half Input
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%res = OpVariable %_ptr_Function_float Function
|
||||
%46 = OpLoad %float %v1
|
||||
%47 = OpLoad %float %v1
|
||||
%48 = OpExtInst %float %1 FMin %46 %47
|
||||
OpStore %res %48
|
||||
%49 = OpLoad %float %v1
|
||||
%50 = OpLoad %float %v1
|
||||
%51 = OpExtInst %float %1 FMax %49 %50
|
||||
OpStore %res %51
|
||||
%52 = OpLoad %float %v1
|
||||
%53 = OpLoad %float %v1
|
||||
%54 = OpLoad %float %v1
|
||||
%55 = OpExtInst %float %1 FClamp %52 %53 %54
|
||||
OpStore %res %55
|
||||
%56 = OpLoad %float %v1
|
||||
%57 = OpLoad %float %v1
|
||||
%58 = OpExtInst %float %1 NMin %56 %57
|
||||
OpStore %res %58
|
||||
%59 = OpLoad %float %v1
|
||||
%60 = OpLoad %float %v1
|
||||
%61 = OpExtInst %float %1 NMax %59 %60
|
||||
OpStore %res %61
|
||||
%62 = OpLoad %float %v1
|
||||
%63 = OpLoad %float %v1
|
||||
%64 = OpLoad %float %v1
|
||||
%65 = OpExtInst %float %1 NClamp %62 %63 %64
|
||||
OpStore %res %65
|
||||
%res2 = OpVariable %_ptr_Function_v2float Function
|
||||
%66 = OpLoad %v2float %v2
|
||||
%67 = OpLoad %v2float %v2
|
||||
%68 = OpExtInst %v2float %1 FMin %66 %67
|
||||
OpStore %res2 %68
|
||||
%69 = OpLoad %v2float %v2
|
||||
%70 = OpLoad %v2float %v2
|
||||
%71 = OpExtInst %v2float %1 FMax %69 %70
|
||||
OpStore %res2 %71
|
||||
%72 = OpLoad %v2float %v2
|
||||
%73 = OpLoad %v2float %v2
|
||||
%74 = OpLoad %v2float %v2
|
||||
%75 = OpExtInst %v2float %1 FClamp %72 %73 %74
|
||||
OpStore %res2 %75
|
||||
%76 = OpLoad %v2float %v2
|
||||
%77 = OpLoad %v2float %v2
|
||||
%78 = OpExtInst %v2float %1 NMin %76 %77
|
||||
OpStore %res2 %78
|
||||
%79 = OpLoad %v2float %v2
|
||||
%80 = OpLoad %v2float %v2
|
||||
%81 = OpExtInst %v2float %1 NMax %79 %80
|
||||
OpStore %res2 %81
|
||||
%82 = OpLoad %v2float %v2
|
||||
%83 = OpLoad %v2float %v2
|
||||
%84 = OpLoad %v2float %v2
|
||||
%85 = OpExtInst %v2float %1 NClamp %82 %83 %84
|
||||
OpStore %res2 %85
|
||||
%res3 = OpVariable %_ptr_Function_v3float Function
|
||||
%86 = OpLoad %v3float %v3
|
||||
%87 = OpLoad %v3float %v3
|
||||
%88 = OpExtInst %v3float %1 FMin %86 %87
|
||||
OpStore %res3 %88
|
||||
%89 = OpLoad %v3float %v3
|
||||
%90 = OpLoad %v3float %v3
|
||||
%91 = OpExtInst %v3float %1 FMax %89 %90
|
||||
OpStore %res3 %91
|
||||
%92 = OpLoad %v3float %v3
|
||||
%93 = OpLoad %v3float %v3
|
||||
%94 = OpLoad %v3float %v3
|
||||
%95 = OpExtInst %v3float %1 FClamp %92 %93 %94
|
||||
OpStore %res3 %95
|
||||
%96 = OpLoad %v3float %v3
|
||||
%97 = OpLoad %v3float %v3
|
||||
%98 = OpExtInst %v3float %1 NMin %96 %97
|
||||
OpStore %res3 %98
|
||||
%99 = OpLoad %v3float %v3
|
||||
%100 = OpLoad %v3float %v3
|
||||
%101 = OpExtInst %v3float %1 NMax %99 %100
|
||||
OpStore %res3 %101
|
||||
%102 = OpLoad %v3float %v3
|
||||
%103 = OpLoad %v3float %v3
|
||||
%104 = OpLoad %v3float %v3
|
||||
%105 = OpExtInst %v3float %1 NClamp %102 %103 %104
|
||||
OpStore %res3 %105
|
||||
%res4 = OpVariable %_ptr_Function_v4float Function
|
||||
%106 = OpLoad %v4float %v4
|
||||
%107 = OpLoad %v4float %v4
|
||||
%108 = OpExtInst %v4float %1 FMin %106 %107
|
||||
OpStore %res4 %108
|
||||
%109 = OpLoad %v4float %v4
|
||||
%110 = OpLoad %v4float %v4
|
||||
%111 = OpExtInst %v4float %1 FMax %109 %110
|
||||
OpStore %res4 %111
|
||||
%112 = OpLoad %v4float %v4
|
||||
%113 = OpLoad %v4float %v4
|
||||
%114 = OpLoad %v4float %v4
|
||||
%115 = OpExtInst %v4float %1 FClamp %112 %113 %114
|
||||
OpStore %res4 %115
|
||||
%116 = OpLoad %v4float %v4
|
||||
%117 = OpLoad %v4float %v4
|
||||
%118 = OpExtInst %v4float %1 NMin %116 %117
|
||||
OpStore %res4 %118
|
||||
%119 = OpLoad %v4float %v4
|
||||
%120 = OpLoad %v4float %v4
|
||||
%121 = OpExtInst %v4float %1 NMax %119 %120
|
||||
OpStore %res4 %121
|
||||
%122 = OpLoad %v4float %v4
|
||||
%123 = OpLoad %v4float %v4
|
||||
%124 = OpLoad %v4float %v4
|
||||
%125 = OpExtInst %v4float %1 NClamp %122 %123 %124
|
||||
OpStore %res4 %125
|
||||
%hres = OpVariable %_ptr_Function_half Function
|
||||
%126 = OpLoad %half %h1
|
||||
%127 = OpLoad %half %h1
|
||||
%128 = OpExtInst %half %1 FMin %126 %127
|
||||
OpStore %hres %128
|
||||
%129 = OpLoad %half %h1
|
||||
%130 = OpLoad %half %h1
|
||||
%131 = OpExtInst %half %1 FMax %129 %130
|
||||
OpStore %hres %131
|
||||
%132 = OpLoad %half %h1
|
||||
%133 = OpLoad %half %h1
|
||||
%134 = OpLoad %half %h1
|
||||
%135 = OpExtInst %half %1 FClamp %132 %133 %134
|
||||
OpStore %hres %135
|
||||
%136 = OpLoad %half %h1
|
||||
%137 = OpLoad %half %h1
|
||||
%138 = OpExtInst %half %1 NMin %136 %137
|
||||
OpStore %hres %138
|
||||
%139 = OpLoad %half %h1
|
||||
%140 = OpLoad %half %h1
|
||||
%141 = OpExtInst %half %1 NMax %139 %140
|
||||
OpStore %hres %141
|
||||
%142 = OpLoad %half %h1
|
||||
%143 = OpLoad %half %h1
|
||||
%144 = OpLoad %half %h1
|
||||
%145 = OpExtInst %half %1 NClamp %142 %143 %144
|
||||
OpStore %hres %145
|
||||
%hres2 = OpVariable %_ptr_Function_v2half Function
|
||||
%146 = OpLoad %v2half %h2
|
||||
%147 = OpLoad %v2half %h2
|
||||
%148 = OpExtInst %v2half %1 FMin %146 %147
|
||||
OpStore %hres2 %148
|
||||
%149 = OpLoad %v2half %h2
|
||||
%150 = OpLoad %v2half %h2
|
||||
%151 = OpExtInst %v2half %1 FMax %149 %150
|
||||
OpStore %hres2 %151
|
||||
%152 = OpLoad %v2half %h2
|
||||
%153 = OpLoad %v2half %h2
|
||||
%154 = OpLoad %v2half %h2
|
||||
%155 = OpExtInst %v2half %1 FClamp %152 %153 %154
|
||||
OpStore %hres2 %155
|
||||
%156 = OpLoad %v2half %h2
|
||||
%157 = OpLoad %v2half %h2
|
||||
%158 = OpExtInst %v2half %1 NMin %156 %157
|
||||
OpStore %hres2 %158
|
||||
%159 = OpLoad %v2half %h2
|
||||
%160 = OpLoad %v2half %h2
|
||||
%161 = OpExtInst %v2half %1 NMax %159 %160
|
||||
OpStore %hres2 %161
|
||||
%162 = OpLoad %v2half %h2
|
||||
%163 = OpLoad %v2half %h2
|
||||
%164 = OpLoad %v2half %h2
|
||||
%165 = OpExtInst %v2half %1 NClamp %162 %163 %164
|
||||
OpStore %hres2 %165
|
||||
%hres3 = OpVariable %_ptr_Function_v3half Function
|
||||
%166 = OpLoad %v3half %h3
|
||||
%167 = OpLoad %v3half %h3
|
||||
%168 = OpExtInst %v3half %1 FMin %166 %167
|
||||
OpStore %hres3 %168
|
||||
%169 = OpLoad %v3half %h3
|
||||
%170 = OpLoad %v3half %h3
|
||||
%171 = OpExtInst %v3half %1 FMax %169 %170
|
||||
OpStore %hres3 %171
|
||||
%172 = OpLoad %v3half %h3
|
||||
%173 = OpLoad %v3half %h3
|
||||
%174 = OpLoad %v3half %h3
|
||||
%175 = OpExtInst %v3half %1 FClamp %172 %173 %174
|
||||
OpStore %hres3 %175
|
||||
%176 = OpLoad %v3half %h3
|
||||
%177 = OpLoad %v3half %h3
|
||||
%178 = OpExtInst %v3half %1 NMin %176 %177
|
||||
OpStore %hres3 %178
|
||||
%179 = OpLoad %v3half %h3
|
||||
%180 = OpLoad %v3half %h3
|
||||
%181 = OpExtInst %v3half %1 NMax %179 %180
|
||||
OpStore %hres3 %181
|
||||
%182 = OpLoad %v3half %h3
|
||||
%183 = OpLoad %v3half %h3
|
||||
%184 = OpLoad %v3half %h3
|
||||
%185 = OpExtInst %v3half %1 NClamp %182 %183 %184
|
||||
OpStore %hres3 %185
|
||||
%hres4 = OpVariable %_ptr_Function_v4half Function
|
||||
%186 = OpLoad %v4half %h4
|
||||
%187 = OpLoad %v4half %h4
|
||||
%188 = OpExtInst %v4half %1 FMin %186 %187
|
||||
OpStore %hres4 %188
|
||||
%189 = OpLoad %v4half %h4
|
||||
%190 = OpLoad %v4half %h4
|
||||
%191 = OpExtInst %v4half %1 FMax %189 %190
|
||||
OpStore %hres4 %191
|
||||
%192 = OpLoad %v4half %h4
|
||||
%193 = OpLoad %v4half %h4
|
||||
%194 = OpLoad %v4half %h4
|
||||
%195 = OpExtInst %v4half %1 FClamp %192 %193 %194
|
||||
OpStore %hres4 %195
|
||||
%196 = OpLoad %v4half %h4
|
||||
%197 = OpLoad %v4half %h4
|
||||
%198 = OpExtInst %v4half %1 NMin %196 %197
|
||||
OpStore %hres4 %198
|
||||
%199 = OpLoad %v4half %h4
|
||||
%200 = OpLoad %v4half %h4
|
||||
%201 = OpExtInst %v4half %1 NMax %199 %200
|
||||
OpStore %hres4 %201
|
||||
%202 = OpLoad %v4half %h4
|
||||
%203 = OpLoad %v4half %h4
|
||||
%204 = OpLoad %v4half %h4
|
||||
%205 = OpExtInst %v4half %1 NClamp %202 %203 %204
|
||||
OpStore %hres4 %205
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -2364,6 +2364,53 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
break;
|
||||
}
|
||||
|
||||
case GLSLstd450FMin:
|
||||
// If the result type isn't float, don't bother calling the specific
|
||||
// precise::/fast:: version. Metal doesn't have those for half and
|
||||
// double types.
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "min");
|
||||
else
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "fast::min");
|
||||
break;
|
||||
|
||||
case GLSLstd450FMax:
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "max");
|
||||
else
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "fast::max");
|
||||
break;
|
||||
|
||||
case GLSLstd450FClamp:
|
||||
// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
|
||||
else
|
||||
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp");
|
||||
break;
|
||||
|
||||
case GLSLstd450NMin:
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "min");
|
||||
else
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "precise::min");
|
||||
break;
|
||||
|
||||
case GLSLstd450NMax:
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "max");
|
||||
else
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "precise::max");
|
||||
break;
|
||||
|
||||
case GLSLstd450NClamp:
|
||||
// TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
|
||||
if (get<SPIRType>(result_type).basetype != SPIRType::Float)
|
||||
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
|
||||
else
|
||||
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
|
||||
break;
|
||||
|
||||
// TODO:
|
||||
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
|
||||
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
|
||||
|
Loading…
Reference in New Issue
Block a user