From 40141ffddfe6a00fba0cce7e1b5f3759b8d520b8 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 22 Sep 2021 18:58:31 -0400 Subject: [PATCH] MSL: Selectively enable fast-math in MSL code to match Vulkan CTS results. Based on CTS testing, math optimizations between MSL and Vulkan are inconsistent. In some cases, enabling MSL's fast-math compilation option matches Vulkan's math results. In other cases, disabling it does. Broadly enabling or disabling fast-math across all shaders results in some CTS test failures either way. To fix this, selectively enable/disable fast-math optimizations in the MSL code, using metal::fast and metal::precise function namespaces, where supported, and the [[clang::optnone]] function attribute otherwise. Adjust SPIRV-Cross unit test reference shaders to accommodate these changes. --- .../shaders-msl/flatten/struct.flatten.vert | 2 +- reference/opt/shaders-msl/frag/in_mat.frag | 2 +- .../frag/scalar-refract-reflect.frag | 2 +- .../opt/shaders-msl/vert/copy.flatten.vert | 2 +- .../opt/shaders-msl/vert/dynamic.flatten.vert | 2 +- .../opt/shaders-msl/vert/packed_matrix.vert | 2 +- .../asm/frag/depth-compare.asm.frag | 4 +-- .../asm/frag/global-constant-arrays.asm.frag | 8 +++--- .../padded-float-array-member-defef.asm.frag | 8 +++--- .../asm/frag/sample-mask-not-array.asm.frag | 2 +- .../asm/vert/texture-buffer.asm.vert | 6 ++-- .../shaders-msl-no-opt/comp/glsl.std450.comp | 12 ++++---- .../frag/fp16.desktop.invalid.frag | 10 +++---- .../shaders-msl/flatten/struct.flatten.vert | 2 +- reference/shaders-msl/frag/in_mat.frag | 4 +-- .../frag/scalar-refract-reflect.frag | 2 +- reference/shaders-msl/vert/copy.flatten.vert | 2 +- .../shaders-msl/vert/dynamic.flatten.vert | 2 +- reference/shaders-msl/vert/packed_matrix.vert | 2 +- ...schain-invalid-expression.asm.invalid.frag | 8 +++--- .../frag/array-copy-error.asm.invalid.frag | 2 +- .../phi-variable-declaration.asm.invalid.frag | 2 +- .../asm/frag/depth-compare.asm.frag | 4 +-- .../asm/frag/global-constant-arrays.asm.frag | 8 +++--- .../padded-float-array-member-defef.asm.frag | 8 +++--- .../asm/frag/sample-mask-not-array.asm.frag | 2 +- .../asm/vert/texture-buffer.asm.vert | 6 ++-- spirv_msl.cpp | 28 +++++++++++-------- 28 files changed, 75 insertions(+), 69 deletions(-) diff --git a/reference/opt/shaders-msl/flatten/struct.flatten.vert b/reference/opt/shaders-msl/flatten/struct.flatten.vert index d97a34a8..dc96ceae 100644 --- a/reference/opt/shaders-msl/flatten/struct.flatten.vert +++ b/reference/opt/shaders-msl/flatten/struct.flatten.vert @@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); float3 _39 = in.aVertex.xyz - float3(_18.light.Position); - out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_39))); + out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_39))); return out; } diff --git a/reference/opt/shaders-msl/frag/in_mat.frag b/reference/opt/shaders-msl/frag/in_mat.frag index 83ed9b5e..1defc6c3 100644 --- a/reference/opt/shaders-msl/frag/in_mat.frag +++ b/reference/opt/shaders-msl/frag/in_mat.frag @@ -27,7 +27,7 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube samplerCol inInvModelView[1] = in.inInvModelView_1; inInvModelView[2] = in.inInvModelView_2; inInvModelView[3] = in.inInvModelView_3; - float4 _31 = inInvModelView * float4(reflect(normalize(in.inPos), normalize(in.inNormal)), 0.0); + float4 _31 = inInvModelView * float4(reflect(fast::normalize(in.inPos), fast::normalize(in.inNormal)), 0.0); float _33 = _31.x; float3 _59 = float3(_33, _31.yz); _59.x = _33 * (-1.0); diff --git a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag index 592d4458..e4adc4ab 100644 --- a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag +++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag @@ -16,7 +16,7 @@ struct main0_in }; template -inline T spvReflect(T i, T n) +[[clang::optnone]] T spvReflect(T i, T n) { return i - T(2) * i * n * n; } diff --git a/reference/opt/shaders-msl/vert/copy.flatten.vert b/reference/opt/shaders-msl/vert/copy.flatten.vert index d73ee328..32fde3a4 100644 --- a/reference/opt/shaders-msl/vert/copy.flatten.vert +++ b/reference/opt/shaders-msl/vert/copy.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int _96 = 0; _96 < 4; ) { float3 _68 = in.aVertex.xyz - float3(_21.lights[_96].Position); - out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_68))); + out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_68))); _96++; continue; } diff --git a/reference/opt/shaders-msl/vert/dynamic.flatten.vert b/reference/opt/shaders-msl/vert/dynamic.flatten.vert index 92911a4e..26264ddf 100644 --- a/reference/opt/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/opt/shaders-msl/vert/dynamic.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int _82 = 0; _82 < 4; ) { float3 _54 = in.aVertex.xyz - float3(_21.lights[_82].Position); - out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_54))); + out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_54))); _82++; continue; } diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert index 74b2c5fc..b8cac0ac 100644 --- a/reference/opt/shaders-msl/vert/packed_matrix.vert +++ b/reference/opt/shaders-msl/vert/packed_matrix.vert @@ -39,7 +39,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_F { main0_out out = {}; float4 _70 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0); - out._RESERVED_IDENTIFIER_FIXUP_3976 = normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); + out._RESERVED_IDENTIFIER_FIXUP_3976 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); float4 _94 = _70; _94.y = -_70.y; out.gl_Position = _94; diff --git a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag index 7f593efe..b54a05fe 100644 --- a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -246,7 +246,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction)); float4 _219 = _453; _219.z = _218; - float3 _236 = normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0)); + float3 _236 = fast::normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0)); uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0)); bool _248 = (_240 & 15u) == 5u; float _448; @@ -259,7 +259,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G if (_160) { float3 _278 = _152 / float3(_158); - float3 _280 = normalize(cross(_278, float3(0.0, 0.0, 1.0))); + float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0))); float3 _284 = float3(_Globals.InvShadowmapResolution); float3 _285 = _280 * _284; float3 _286 = cross(_280, _278) * _284; diff --git a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag index c10e3915..03fa7c53 100644 --- a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -176,7 +176,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _644 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); float _649 = ((2.0 * _633) - (8.0 * _644)) + 4.0; float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649); - float2 _660 = normalize(float2(_633, _644)); + float2 _660 = fast::normalize(float2(_633, _644)); float _665 = _633 + (((-_660.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; @@ -272,7 +272,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _990 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_974 - _976), ((2.0 * _973) - _974) - _976); + _990 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_974 - _976), ((2.0 * _973) - _974) - _976); } float _995; if (_990 < 0.0) @@ -434,7 +434,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2174 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160); + _2174 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160); } float _2179; if (_2174 < 0.0) @@ -796,7 +796,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1337 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323); + _1337 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323); } float _1342; if (_1337 < 0.0) diff --git a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag index 2926c322..f1c43630 100644 --- a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -178,7 +178,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _670 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); float _675 = ((2.0 * _659) - (8.0 * _670)) + 4.0; float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675); - float2 _686 = normalize(float2(_659, _670)); + float2 _686 = fast::normalize(float2(_659, _670)); float _691 = _659 + (((-_686.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; @@ -274,7 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1016 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002); + _1016 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002); } float _1021; if (_1016 < 0.0) @@ -483,7 +483,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2278 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264); + _2278 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264); } float _2283; if (_2278 < 0.0) @@ -845,7 +845,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1443 = 57.2957763671875 * atan2(1.73205077648162841796875 * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429); + _1443 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429); } float _1448; if (_1443 < 0.0) diff --git a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag index e7b96e7a..866b1916 100644 --- a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -462,7 +462,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { _215 = in.in_var_TEXCOORD7; } - float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); + float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); float3 _246; if (View.View_OutOfBoundsMask > 0.0) { diff --git a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert index 6384e1b9..5b124f06 100644 --- a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert +++ b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -296,7 +296,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0)); float _146 = _137.w; float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz; - float3 _160 = normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); + float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (((CurveTexture.sample(CurveTextureSampler, (EmitterUniforms.EmitterUniforms_MiscCurve.xy + (EmitterUniforms.EmitterUniforms_MiscCurve.zw * float2(_146))), level(0.0)) * EmitterUniforms.EmitterUniforms_MiscScale) + EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); float3 _239 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _137.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _137.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll); @@ -312,8 +312,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float3 _279 = cross(_265, float3(0.0, 0.0, 1.0)); float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625))); float3 _286 = float3(fast::clamp((_261 * EmitterUniforms.EmitterUniforms_CameraFacingBlend[1]) - EmitterUniforms.EmitterUniforms_CameraFacingBlend[2], 0.0, 1.0)); - _335 = normalize(mix(_251, _284, _286)); - _336 = normalize(mix(_259, cross(_265, _284), _286)); + _335 = fast::normalize(mix(_251, _284, _286)); + _336 = fast::normalize(mix(_259, cross(_265, _284), _286)); } else { diff --git a/reference/shaders-msl-no-opt/comp/glsl.std450.comp b/reference/shaders-msl-no-opt/comp/glsl.std450.comp index 3c505abe..1b3295f4 100644 --- a/reference/shaders-msl-no-opt/comp/glsl.std450.comp +++ b/reference/shaders-msl-no-opt/comp/glsl.std450.comp @@ -170,7 +170,7 @@ float2x2 spvInverse2x2(float2x2 m) } template -inline T spvReflect(T i, T n) +[[clang::optnone]] T spvReflect(T i, T n) { return i - T(2) * i * n * n; } @@ -217,13 +217,13 @@ kernel void main0(device SSBO& _19 [[buffer(0)]]) _19.res = asin(((device float*)&_19.f32)[0u]); _19.res = acos(((device float*)&_19.f32)[0u]); _19.res = atan(((device float*)&_19.f32)[0u]); - _19.res = sinh(((device float*)&_19.f32)[0u]); - _19.res = cosh(((device float*)&_19.f32)[0u]); - _19.res = tanh(((device float*)&_19.f32)[0u]); + _19.res = fast::sinh(((device float*)&_19.f32)[0u]); + _19.res = fast::cosh(((device float*)&_19.f32)[0u]); + _19.res = precise::tanh(((device float*)&_19.f32)[0u]); _19.res = asinh(((device float*)&_19.f32)[0u]); _19.res = acosh(((device float*)&_19.f32)[0u]); _19.res = atanh(((device float*)&_19.f32)[0u]); - _19.res = atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); + _19.res = precise::atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); _19.res = pow(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]); _19.res = exp(((device float*)&_19.f32)[0u]); _19.res = log(((device float*)&_19.f32)[0u]); @@ -239,7 +239,7 @@ kernel void main0(device SSBO& _19 [[buffer(0)]]) _19.res = spvRefract(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]); _19.res = length(_19.f32.xy); _19.res = distance(_19.f32.xy, _19.f32.zw); - float2 v2 = normalize(_19.f32.xy); + float2 v2 = fast::normalize(_19.f32.xy); v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw); v2 = reflect(_19.f32.xy, _19.f32.zw); v2 = refract(_19.f32.xy, _19.f32.yz, ((device float*)&_19.f32)[3u]); diff --git a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag index 3bf42962..de53d681 100644 --- a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag +++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag @@ -94,11 +94,11 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) res = cos(v4); res = tan(v4); res = asin(v4); - res = atan2(v4, v3.xyzz); + res = precise::atan2(v4, v3.xyzz); res = atan(v4); - res = sinh(v4); - res = cosh(v4); - res = tanh(v4); + res = fast::sinh(v4); + res = fast::cosh(v4); + res = precise::tanh(v4); res = asinh(v4); res = acosh(v4); res = atanh(v4); @@ -143,7 +143,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) t0 = distance(v4, v4); t0 = dot(v4, v4); half3 res3 = cross(v3, v3); - res = normalize(v4); + res = fast::normalize(v4); res = faceforward(v4, v4, v4); res = reflect(v4, v4); res = refract(v4, v4, v1); diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert index 954f9255..f79a794c 100644 --- a/reference/shaders-msl/flatten/struct.flatten.vert +++ b/reference/shaders-msl/flatten/struct.flatten.vert @@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]] out.gl_Position = _18.uMVP * in.aVertex; out.vColor = float4(0.0); float3 L = in.aVertex.xyz - float3(_18.light.Position); - out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); return out; } diff --git a/reference/shaders-msl/frag/in_mat.frag b/reference/shaders-msl/frag/in_mat.frag index 70ff4860..cf7da2ef 100644 --- a/reference/shaders-msl/frag/in_mat.frag +++ b/reference/shaders-msl/frag/in_mat.frag @@ -27,8 +27,8 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube samplerCol inInvModelView[1] = in.inInvModelView_1; inInvModelView[2] = in.inInvModelView_2; inInvModelView[3] = in.inInvModelView_3; - float3 cI = normalize(in.inPos); - float3 cR = reflect(cI, normalize(in.inNormal)); + float3 cI = fast::normalize(in.inPos); + float3 cR = reflect(cI, fast::normalize(in.inNormal)); cR = float3((inInvModelView * float4(cR, 0.0)).xyz); cR.x *= (-1.0); out.outFragColor = samplerColor.sample(samplerColorSmplr, cR, bias(in.inLodBias)); diff --git a/reference/shaders-msl/frag/scalar-refract-reflect.frag b/reference/shaders-msl/frag/scalar-refract-reflect.frag index 592d4458..e4adc4ab 100644 --- a/reference/shaders-msl/frag/scalar-refract-reflect.frag +++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag @@ -16,7 +16,7 @@ struct main0_in }; template -inline T spvReflect(T i, T n) +[[clang::optnone]] T spvReflect(T i, T n) { return i - T(2) * i * n * n; } diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert index a762f7e7..92757a60 100644 --- a/reference/shaders-msl/vert/copy.flatten.vert +++ b/reference/shaders-msl/vert/copy.flatten.vert @@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] light.Radius = _21.lights[i].Radius; light.Color = _21.lights[i].Color; float3 L = in.aVertex.xyz - light.Position; - out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); } return out; } diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert index c285f3c8..43b3e112 100644 --- a/reference/shaders-msl/vert/dynamic.flatten.vert +++ b/reference/shaders-msl/vert/dynamic.flatten.vert @@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]] for (int i = 0; i < 4; i++) { float3 L = in.aVertex.xyz - float3(_21.lights[i].Position); - out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L))); + out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L))); } return out; } diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert index 9cc416a3..e18d5f22 100644 --- a/reference/shaders-msl/vert/packed_matrix.vert +++ b/reference/shaders-msl/vert/packed_matrix.vert @@ -42,7 +42,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_F float3 _RESERVED_IDENTIFIER_FIXUP_23783; for (;;) { - _RESERVED_IDENTIFIER_FIXUP_23783 = normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); + _RESERVED_IDENTIFIER_FIXUP_23783 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1); break; } float4 _RESERVED_IDENTIFIER_FIXUP_14995 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0); diff --git a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag index 3a13024d..0f9cd547 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag @@ -217,10 +217,10 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu main0_out out = {}; float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w); float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation); - float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz); + float3 _181 = fast::normalize(-in.in_var_TEXCOORD8.xyz); float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0))); float2 _190 = (_187.xy * float2(2.0)) - float2(1.0); - float3 _206 = normalize(float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) * (((float4(_190, sqrt(fast::clamp(1.0 - dot(_190, _190), 0.0, 1.0)), 1.0).xyz * float3(0.300000011920928955078125, 0.300000011920928955078125, 1.0)) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)); + float3 _206 = fast::normalize(float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) * (((float4(_190, sqrt(fast::clamp(1.0 - dot(_190, _190), 0.0, 1.0)), 1.0).xyz * float3(0.300000011920928955078125, 0.300000011920928955078125, 1.0)) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)); float _208 = dot(_206, _181); float4 _217 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(20.0))); float _219 = mix(0.4000000059604644775390625, 1.0, _217.x); @@ -301,7 +301,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { _423 = 1.0; } - float3 _429 = normalize(_181 + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz); + float3 _429 = fast::normalize(_181 + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz); float _439 = (_253 * 0.25) + 0.25; float3 _440 = cross(_206, _429); float _442 = _253 * _253; @@ -334,7 +334,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179; float _502 = dot(_501, _501); float3 _505 = _501 * float3(rsqrt(_502)); - _507 = normalize(_181 + _505); + _507 = fast::normalize(_181 + _505); _509 = fast::max(0.0, dot(_206, _505)); _511 = fast::max(0.0, dot(_206, _507)); if (_Globals.LightColorAndFalloffExponent[_491].w == 0.0) diff --git a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag index bc40c7cc..bb6058c3 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag @@ -271,7 +271,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); float3 _148 = _144.xyz / float3(_144.w); float3 _149 = _148 - float3(View.View_PreViewTranslation); - float3 _151 = normalize(-_148); + float3 _151 = fast::normalize(-_148); float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); float _171 = floor(_170); diff --git a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag index bc40c7cc..bb6058c3 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag @@ -271,7 +271,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0); float3 _148 = _144.xyz / float3(_144.w); float3 _149 = _148 - float3(View.View_PreViewTranslation); - float3 _151 = normalize(-_148); + float3 _151 = fast::normalize(-_148); float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); float _171 = floor(_170); diff --git a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag index bdeccc2f..9ee2d89b 100644 --- a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag +++ b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag @@ -246,7 +246,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction)); float4 _219 = _107; _219.z = _218; - float3 _236 = normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0)); + float3 _236 = fast::normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0)); uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0)); bool _248 = (_240 & 15u) == 5u; float _448; @@ -259,7 +259,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G if (_160) { float3 _278 = _152 / float3(_158); - float3 _280 = normalize(cross(_278, float3(0.0, 0.0, 1.0))); + float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0))); float3 _284 = float3(_Globals.InvShadowmapResolution); float3 _285 = _280 * _284; float3 _286 = cross(_280, _278) * _284; diff --git a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag index 210d4f98..c3d26e65 100644 --- a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag +++ b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag @@ -176,7 +176,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _644 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); float _649 = ((2.0 * _633) - (8.0 * _644)) + 4.0; float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649); - float2 _660 = normalize(float2(_633, _644)); + float2 _660 = fast::normalize(float2(_633, _644)); float _665 = _633 + (((-_660.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0; @@ -273,7 +273,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _990 = 57.2957763671875 * atan2(sqrt(3.0) * (_974 - _976), ((2.0 * _973) - _974) - _976); + _990 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_974 - _976), ((2.0 * _973) - _974) - _976); } float _995; if (_990 < 0.0) @@ -436,7 +436,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2174 = 57.2957763671875 * atan2(sqrt(3.0) * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160); + _2174 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160); } float _2179; if (_2174 < 0.0) @@ -909,7 +909,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1337 = 57.2957763671875 * atan2(sqrt(3.0) * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323); + _1337 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323); } float _1342; if (_1337 < 0.0) diff --git a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag index 67fa21aa..83857f22 100644 --- a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag @@ -178,7 +178,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float _670 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)); float _675 = ((2.0 * _659) - (8.0 * _670)) + 4.0; float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675); - float2 _686 = normalize(float2(_659, _670)); + float2 _686 = fast::normalize(float2(_659, _670)); float _691 = _659 + (((-_686.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125); float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0; @@ -275,7 +275,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1016 = 57.2957763671875 * atan2(sqrt(3.0) * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002); + _1016 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002); } float _1021; if (_1016 < 0.0) @@ -485,7 +485,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _2278 = 57.2957763671875 * atan2(sqrt(3.0) * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264); + _2278 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264); } float _2283; if (_2278 < 0.0) @@ -958,7 +958,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa } else { - _1443 = 57.2957763671875 * atan2(sqrt(3.0) * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429); + _1443 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429); } float _1448; if (_1443 < 0.0) diff --git a/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag index 9404aa66..834517a8 100644 --- a/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag +++ b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -462,7 +462,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { _215 = in.in_var_TEXCOORD7; } - float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-1.0, -1.5, 3.0) / float3(sqrt(12.25)), normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); + float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-1.0, -1.5, 3.0) / float3(sqrt(12.25)), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0)); float3 _246; if (View.View_OutOfBoundsMask > 0.0) { diff --git a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert index 6384e1b9..5b124f06 100644 --- a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert +++ b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert @@ -296,7 +296,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0)); float _146 = _137.w; float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz; - float3 _160 = normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); + float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05)); float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (((CurveTexture.sample(CurveTextureSampler, (EmitterUniforms.EmitterUniforms_MiscCurve.xy + (EmitterUniforms.EmitterUniforms_MiscCurve.zw * float2(_146))), level(0.0)) * EmitterUniforms.EmitterUniforms_MiscScale) + EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0)); float3 _239 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _137.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _137.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz; float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll); @@ -312,8 +312,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buff float3 _279 = cross(_265, float3(0.0, 0.0, 1.0)); float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625))); float3 _286 = float3(fast::clamp((_261 * EmitterUniforms.EmitterUniforms_CameraFacingBlend[1]) - EmitterUniforms.EmitterUniforms_CameraFacingBlend[2], 0.0, 1.0)); - _335 = normalize(mix(_251, _284, _286)); - _336 = normalize(mix(_259, cross(_265, _284), _286)); + _335 = fast::normalize(mix(_251, _284, _286)); + _336 = fast::normalize(mix(_259, cross(_265, _284), _286)); } else { diff --git a/spirv_msl.cpp b/spirv_msl.cpp index acd71ace..2df30421 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -5719,8 +5719,9 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplReflectScalar: // Metal does not support scalar versions of these functions. + // Ensure fast-math is disabled to match Vulkan results. statement("template"); - statement("inline T spvReflect(T i, T n)"); + statement("[[clang::optnone]] T spvReflect(T i, T n)"); begin_scope(); statement("return i - T(2) * i * n * n;"); end_scope(); @@ -8781,7 +8782,8 @@ const char *CompilerMSL::get_memory_order(uint32_t) return "memory_order_relaxed"; } -// Override for MSL-specific extension syntax instructions +// Override for MSL-specific extension syntax instructions. +// In some cases, deliberately select either the fast or precise versions of the MSL functions to match Vulkan math precision results. void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) { auto op = static_cast(eop); @@ -8793,8 +8795,17 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, switch (op) { + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "fast::sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "fast::cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "precise::tanh"); + break; case GLSLstd450Atan2: - emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "rsqrt"); @@ -9018,25 +9029,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; case GLSLstd450Length: - // MSL does not support scalar versions here. + // MSL does not support scalar versions, so use abs(). if (expression_type(args[0]).vecsize == 1) - { - // Equivalent to abs(). emit_unary_func_op(result_type, id, args[0], "abs"); - } else CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; case GLSLstd450Normalize: // MSL does not support scalar versions here. + // Returns -1 or 1 for valid input, sign() does the job. if (expression_type(args[0]).vecsize == 1) - { - // Returns -1 or 1 for valid input, sign() does the job. emit_unary_func_op(result_type, id, args[0], "sign"); - } else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + emit_unary_func_op(result_type, id, args[0], "fast::normalize"); break; case GLSLstd450Reflect: