From 31be74a853b92d98893e2c35d2319cb227afc9b0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Mar 2022 11:04:45 +0100 Subject: [PATCH] Add relax_nan_checks options. Makes codegen from typical D3D emulation SPIR-V more readable. Also makes cross compilation with NotEqual more sensible. It's very rare to actually need the strict NaN-checks in practice. Also, glslang now emits UnordNotEqual by default it seems, so give up trying to assume OrdNotEqual. Harmonize for UnordNotEqual as the sane default. --- CMakeLists.txt | 2 +- main.cpp | 5 + .../comp/nmin-max-clamp.relax-nan.asm.comp | 30 ++ .../unord-relational-op.relax-nan.asm.frag | 22 ++ .../comp/mat3-row-maj-read-write-const.comp | 3 +- .../comp/threadgroup-boolean-workaround.comp | 2 +- .../comp/nmin-max-clamp.relax-nan.asm.comp | 42 +++ reference/opt/shaders/comp/cfg.comp | 2 +- .../asm/frag/unordered-compare.asm.frag | 1 + .../frag/unordered-compare.relax-nan.asm.frag | 52 ++++ ...sm30.vert => empty-shader.nofxc.sm30.vert} | 0 .../comp/nmin-max-clamp.relax-nan.asm.comp | 27 ++ .../frag/fp16.desktop.invalid.frag | 4 +- .../min-max-clamp.relax-nan.invalid.asm.frag | 69 +++++ .../asm/frag/unord-relational-op.asm.frag | 9 +- .../unord-relational-op.relax-nan.asm.frag | 59 ++++ .../comp/mat3-row-maj-read-write-const.comp | 2 +- .../comp/threadgroup-boolean-workaround.comp | 2 +- .../asm/frag/unordered-compare.asm.frag | 5 +- .../frag/unordered-compare.relax-nan.asm.frag | 34 ++ .../frag/fp16.invalid.desktop.frag | 4 +- .../comp/nmin-max-clamp.relax-nan.asm.comp | 39 +++ reference/shaders/comp/cfg.comp | 10 +- .../asm/frag/unordered-compare.asm.frag | 2 + .../frag/unordered-compare.relax-nan.asm.frag | 179 +++++++++++ ...sm30.vert => empty-shader.nofxc.sm30.vert} | 0 .../comp/nmin-max-clamp.relax-nan.asm.comp | 203 ++++++++++++ .../min-max-clamp.relax-nan.invalid.asm.frag | 293 ++++++++++++++++++ .../asm/frag/unord-relational-op.asm.frag | 2 + .../unord-relational-op.relax-nan.asm.frag | 207 +++++++++++++ .../asm/frag/unordered-compare.asm.frag | 2 + .../frag/unordered-compare.relax-nan.asm.frag | 179 +++++++++++ .../comp/nmin-max-clamp.relax-nan.asm.comp | 203 ++++++++++++ spirv_cross_c.cpp | 3 + spirv_cross_c.h | 4 +- spirv_glsl.cpp | 74 ++++- spirv_glsl.hpp | 12 + spirv_hlsl.cpp | 4 + spirv_msl.cpp | 12 +- test_shaders.py | 6 + 40 files changed, 1777 insertions(+), 33 deletions(-) create mode 100644 reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp create mode 100644 reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag create mode 100644 reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp create mode 100644 reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag rename reference/shaders-hlsl-no-opt/vert/{empty-shader.sm30.vert => empty-shader.nofxc.sm30.vert} (100%) create mode 100644 reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp create mode 100644 reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag create mode 100644 reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag create mode 100644 reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag create mode 100644 reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp create mode 100644 shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag rename shaders-hlsl-no-opt/vert/{empty-shader.sm30.vert => empty-shader.nofxc.sm30.vert} (100%) create mode 100644 shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp create mode 100644 shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag create mode 100644 shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag create mode 100644 shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag create mode 100644 shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6860c5bc..d8ea49a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -332,7 +332,7 @@ if (SPIRV_CROSS_STATIC) endif() set(spirv-cross-abi-major 0) -set(spirv-cross-abi-minor 48) +set(spirv-cross-abi-minor 49) set(spirv-cross-abi-patch 0) if (SPIRV_CROSS_SHARED) diff --git a/main.cpp b/main.cpp index fe197376..9124ecaf 100644 --- a/main.cpp +++ b/main.cpp @@ -669,6 +669,7 @@ struct CLIArguments bool emit_line_directives = false; bool enable_storage_image_qualifier_deduction = true; bool force_zero_initialized_variables = false; + bool relax_nan_checks = false; uint32_t force_recompile_max_debug_iterations = 3; SmallVector msl_discrete_descriptor_sets; SmallVector msl_device_argument_buffers; @@ -919,6 +920,7 @@ static void print_help_common() "\t[--mask-stage-output-builtin ]:\n" "\t\tIf a stage output variable with matching builtin is active, " "optimize away the variable if it can affect cross-stage linking correctness.\n" + "\t[--relax-nan-checks]:\n\t\tRelax NaN checks for N{Clamp,Min,Max} and ordered vs. unordered compare instructions.\n" ); // clang-format on } @@ -1292,6 +1294,7 @@ static string compile_iteration(const CLIArguments &args, std::vector opts.emit_line_directives = args.emit_line_directives; opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction; opts.force_zero_initialized_variables = args.force_zero_initialized_variables; + opts.relax_nan_checks = args.relax_nan_checks; opts.force_recompile_max_debug_iterations = args.force_recompile_max_debug_iterations; compiler->set_common_options(opts); @@ -1689,6 +1692,8 @@ static int main_inner(int argc, char *argv[]) args.force_recompile_max_debug_iterations = parser.next_uint(); }); + cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; }); + cbs.default_handler = [&args](const char *value) { args.input = value; }; cbs.add("-", [&args](CLIParser &) { args.input = "-"; }); cbs.error_handler = [] { print_help(); }; diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..9f51eff1 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,30 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + for (int _139 = 0; _139 < 2; ) + { + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60))))); + _139++; + continue; + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000..aee290f5 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +constant float a_tmp [[function_constant(1)]]; +constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0; +constant float b_tmp [[function_constant(2)]]; +constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(a + b); + return out; +} + diff --git a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp index 47c83afe..cf26178e 100644 --- a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp +++ b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp @@ -12,8 +12,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device model_t& model [[buffer(0)]]) { - float _38 = (transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x; - if ((isunordered(_38, 0.0) || _38 != 0.0)) + if ((transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x != 0.0) { model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0))); } diff --git a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp index 8b80929a..c1eccf27 100644 --- a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp +++ b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp @@ -13,7 +13,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u); kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { threadgroup short4 foo[4]; - foo[gl_LocalInvocationIndex] = short4((isunordered(_23.values[gl_GlobalInvocationID.x], float4(10.0)) || _23.values[gl_GlobalInvocationID.x] != float4(10.0))); + foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0)); threadgroup_barrier(mem_flags::mem_threadgroup); _23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u])); } diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..32d8e025 --- /dev/null +++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,42 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = min(_4.b1, _4.c1); + _4.a2 = min(_4.b2, _4.c2); + _4.a3 = min(_4.b3, _4.c3); + _4.a4 = min(_4.b4, _4.c4); + _4.a1 = max(_4.b1, _4.c1); + _4.a2 = max(_4.b2, _4.c2); + _4.a3 = max(_4.b3, _4.c3); + _4.a4 = max(_4.b4, _4.c4); + _4.a1 = clamp(_4.a1, _4.b1, _4.c1); + _4.a2 = clamp(_4.a2, _4.b2, _4.c2); + _4.a3 = clamp(_4.a3, _4.b3, _4.c3); + _4.a4 = clamp(_4.a4, _4.b4, _4.c4); + for (int _139 = 0; _139 < 2; ) + { + _4.a2 = min(_4.b2, _4.c2); + _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y); + _139++; + continue; + } +} + diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp index 97cdbc76..af207378 100644 --- a/reference/opt/shaders/comp/cfg.comp +++ b/reference/opt/shaders/comp/cfg.comp @@ -10,7 +10,7 @@ float _188; void main() { - if (!(_11.data == 0.0)) + if (_11.data != 0.0) { _11.data = 10.0; } diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag index f18e8e77..021333cc 100644 --- a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag +++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -21,6 +21,7 @@ float4 test_vector() bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w)); bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w); bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq); } diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000..0172c20b --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,52 @@ +static float4 A; +static float4 B; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 A : TEXCOORD0; + float4 B : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +float4 test_vector() +{ + bool4 le = bool4(A.x < B.x, A.y < B.y, A.z < B.z, A.w < B.w); + bool4 leq = bool4(A.x <= B.x, A.y <= B.y, A.z <= B.z, A.w <= B.w); + bool4 ge = bool4(A.x > B.x, A.y > B.y, A.z > B.z, A.w > B.w); + bool4 geq = bool4(A.x >= B.x, A.y >= B.y, A.z >= B.z, A.w >= B.w); + bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w); + bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq); +} + +float test_scalar() +{ + bool le = A.x < B.x; + bool leq = A.x <= B.x; + bool ge = A.x > B.x; + bool geq = A.x >= B.x; + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void frag_main() +{ + FragColor = test_vector() + test_scalar().xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + A = stage_input.A; + B = stage_input.B; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert similarity index 100% rename from reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert rename to reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..88f53a4c --- /dev/null +++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,27 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96))))); + _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112))))); + _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128))))); + for (int i = 0; i < 2; i++, _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60)))))) + { + _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))))); + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag index 9eb0f595..16182ae2 100644 --- a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag +++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag @@ -77,7 +77,7 @@ void test_conversions() half one = test_result(); int a = int(one); uint b = uint(one); - bool c = (isunordered(one, half(0.0)) || one != half(0.0)); + bool c = one != half(0.0); float d = float(one); half a2 = half(a); half b2 = half(b); @@ -152,7 +152,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1) btmp = v4 > v4; btmp = v4 >= v4; btmp = v4 == v4; - btmp = (isunordered(v4, v4) || v4 != v4); + btmp = v4 != v4; res = dfdx(v4); res = dfdy(v4); res = dfdx(v4); diff --git a/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag new file mode 100644 index 00000000..7835e013 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag @@ -0,0 +1,69 @@ +#include +#include + +using namespace metal; + +struct main0_in +{ + float v1 [[user(locn0)]]; + float2 v2 [[user(locn1)]]; + float3 v3 [[user(locn2)]]; + float4 v4 [[user(locn3)]]; + half h1 [[user(locn4)]]; + half2 h2 [[user(locn5)]]; + half3 h3 [[user(locn6)]]; + half4 h4 [[user(locn7)]]; +}; + +fragment void main0(main0_in in [[stage_in]]) +{ + float res = fast::min(in.v1, in.v1); + res = fast::max(in.v1, in.v1); + res = fast::clamp(in.v1, in.v1, in.v1); + res = fast::min(in.v1, in.v1); + res = fast::max(in.v1, in.v1); + res = fast::clamp(in.v1, in.v1, in.v1); + float2 res2 = fast::min(in.v2, in.v2); + res2 = fast::max(in.v2, in.v2); + res2 = fast::clamp(in.v2, in.v2, in.v2); + res2 = fast::min(in.v2, in.v2); + res2 = fast::max(in.v2, in.v2); + res2 = fast::clamp(in.v2, in.v2, in.v2); + float3 res3 = fast::min(in.v3, in.v3); + res3 = fast::max(in.v3, in.v3); + res3 = fast::clamp(in.v3, in.v3, in.v3); + res3 = fast::min(in.v3, in.v3); + res3 = fast::max(in.v3, in.v3); + res3 = fast::clamp(in.v3, in.v3, in.v3); + float4 res4 = fast::min(in.v4, in.v4); + res4 = fast::max(in.v4, in.v4); + res4 = fast::clamp(in.v4, in.v4, in.v4); + res4 = fast::min(in.v4, in.v4); + res4 = fast::max(in.v4, in.v4); + res4 = fast::clamp(in.v4, in.v4, in.v4); + half hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + hres = min(in.h1, in.h1); + hres = max(in.h1, in.h1); + hres = clamp(in.h1, in.h1, in.h1); + half2 hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + hres2 = min(in.h2, in.h2); + hres2 = max(in.h2, in.h2); + hres2 = clamp(in.h2, in.h2, in.h2); + half3 hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + hres3 = min(in.h3, in.h3); + hres3 = max(in.h3, in.h3); + hres3 = clamp(in.h3, in.h3, in.h3); + half4 hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); + hres4 = min(in.h4, in.h4); + hres4 = max(in.h4, in.h4); + hres4 = clamp(in.h4, in.h4, in.h4); +} + diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag index 8df57c55..624408c4 100644 --- a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag +++ b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag @@ -29,25 +29,26 @@ fragment main0_out main0(main0_in in [[stage_in]]) float t0 = a; float t1 = b; bool c1 = (isunordered(a, b) || a == b); - bool c2 = (isunordered(a, b) || a != b); + c1 = a != b; + bool c2 = a != b; bool c3 = (isunordered(a, b) || a < b); bool c4 = (isunordered(a, b) || a > b); bool c5 = (isunordered(a, b) || a <= b); bool c6 = (isunordered(a, b) || a >= b); bool2 c7 = (isunordered(in.c, in.d) || in.c == in.d); - bool2 c8 = (isunordered(in.c, in.d) || in.c != in.d); + bool2 c8 = in.c != in.d; bool2 c9 = (isunordered(in.c, in.d) || in.c < in.d); bool2 c10 = (isunordered(in.c, in.d) || in.c > in.d); bool2 c11 = (isunordered(in.c, in.d) || in.c <= in.d); bool2 c12 = (isunordered(in.c, in.d) || in.c >= in.d); bool3 c13 = (isunordered(in.e, in.f) || in.e == in.f); - bool3 c14 = (isunordered(in.e, in.f) || in.e != in.f); + bool3 c14 = in.e != in.f; bool3 c15 = (isunordered(in.e, in.f) || in.e < in.f); bool3 c16 = (isunordered(in.e, in.f) || in.e > in.f); bool3 c17 = (isunordered(in.e, in.f) || in.e <= in.f); bool3 c18 = (isunordered(in.e, in.f) || in.e >= in.f); bool4 c19 = (isunordered(in.g, in.h) || in.g == in.h); - bool4 c20 = (isunordered(in.g, in.h) || in.g != in.h); + bool4 c20 = in.g != in.h; bool4 c21 = (isunordered(in.g, in.h) || in.g < in.h); bool4 c22 = (isunordered(in.g, in.h) || in.g > in.h); bool4 c23 = (isunordered(in.g, in.h) || in.g <= in.h); diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000..48482806 --- /dev/null +++ b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,59 @@ +#include +#include + +using namespace metal; + +constant float a_tmp [[function_constant(1)]]; +constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0; +constant float b_tmp [[function_constant(2)]]; +constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 c [[user(locn2)]]; + float2 d [[user(locn3)]]; + float3 e [[user(locn4)]]; + float3 f [[user(locn5)]]; + float4 g [[user(locn6)]]; + float4 h [[user(locn7)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float t0 = a; + float t1 = b; + bool c1 = a == b; + c1 = a != b; + bool c2 = a != b; + bool c3 = a < b; + bool c4 = a > b; + bool c5 = a <= b; + bool c6 = a >= b; + bool2 c7 = in.c == in.d; + bool2 c8 = in.c != in.d; + bool2 c9 = in.c < in.d; + bool2 c10 = in.c > in.d; + bool2 c11 = in.c <= in.d; + bool2 c12 = in.c >= in.d; + bool3 c13 = in.e == in.f; + bool3 c14 = in.e != in.f; + bool3 c15 = in.e < in.f; + bool3 c16 = in.e > in.f; + bool3 c17 = in.e <= in.f; + bool3 c18 = in.e >= in.f; + bool4 c19 = in.g == in.h; + bool4 c20 = in.g != in.h; + bool4 c21 = in.g < in.h; + bool4 c22 = in.g > in.h; + bool4 c23 = in.g <= in.h; + bool4 c24 = in.g >= in.h; + out.FragColor = float4(t0 + t1); + return out; +} + diff --git a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp index 54fb89bc..3de0ef44 100644 --- a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp +++ b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp @@ -14,7 +14,7 @@ kernel void main0(device model_t& model [[buffer(0)]]) { float3x3 mtx_cm = transpose(model.mtx_rm); float3x3 mtx1 = mtx_cm * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)); - if ((isunordered(mtx1[0].x, 0.0) || mtx1[0].x != 0.0)) + if (mtx1[0].x != 0.0) { model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0))); } diff --git a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp index d01b1351..754f7357 100644 --- a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp +++ b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp @@ -15,7 +15,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u); static inline __attribute__((always_inline)) void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& v_23, thread uint3& gl_GlobalInvocationID) { - foo[gl_LocalInvocationIndex] = short4((isunordered(v_23.values[gl_GlobalInvocationID.x], float4(10.0)) || v_23.values[gl_GlobalInvocationID.x] != float4(10.0))); + foo[gl_LocalInvocationIndex] = short4(v_23.values[gl_GlobalInvocationID.x] != float4(10.0)); threadgroup_barrier(mem_flags::mem_threadgroup); v_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u])); } diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag index 21aadfb1..61122bbd 100644 --- a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag +++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -11,7 +11,8 @@ vec4 test_vector() bvec4 ge = not(lessThanEqual(A, B)); bvec4 geq = not(lessThan(A, B)); bvec4 eq = not(notEqual(A, B)); - bvec4 neq = not(equal(A, B)); + bvec4 neq = notEqual(A, B); + neq = notEqual(A, B); return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq); } @@ -22,7 +23,7 @@ float test_scalar() bool ge = !(A.x <= B.x); bool geq = !(A.x < B.x); bool eq = !(A.x != B.x); - bool neq = !(A.x == B.x); + bool neq = A.x != B.x; return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); } diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000..24db7c9f --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(location = 0) in vec4 A; +layout(location = 1) in vec4 B; +layout(location = 0) out vec4 FragColor; + +vec4 test_vector() +{ + bvec4 le = lessThan(A, B); + bvec4 leq = lessThanEqual(A, B); + bvec4 ge = greaterThan(A, B); + bvec4 geq = greaterThanEqual(A, B); + bvec4 eq = equal(A, B); + bvec4 neq = notEqual(A, B); + neq = notEqual(A, B); + return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq); +} + +float test_scalar() +{ + bool le = A.x < B.x; + bool leq = A.x <= B.x; + bool ge = A.x > B.x; + bool geq = A.x >= B.x; + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void main() +{ + FragColor = test_vector() + vec4(test_scalar()); +} + diff --git a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag index 55f5235e..faf79b2b 100644 --- a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag +++ b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag @@ -50,7 +50,7 @@ void test_conversions() float16_t one = test_result(); int a = int(one); uint b = uint(one); - bool c = !(one == float16_t(0.0)); + bool c = one != float16_t(0.0); float d = float(one); double e = double(one); float16_t a2 = float16_t(a); @@ -126,7 +126,7 @@ void test_builtins() btmp = greaterThan(v4, v4); btmp = greaterThanEqual(v4, v4); btmp = equal(v4, v4); - btmp = not(equal(v4, v4)); + btmp = notEqual(v4, v4); res = dFdx(v4); res = dFdy(v4); res = dFdxFine(v4); diff --git a/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..449a87d0 --- /dev/null +++ b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,39 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = min(_4.b1, _4.c1); + _4.a2 = min(_4.b2, _4.c2); + _4.a3 = min(_4.b3, _4.c3); + _4.a4 = min(_4.b4, _4.c4); + _4.a1 = max(_4.b1, _4.c1); + _4.a2 = max(_4.b2, _4.c2); + _4.a3 = max(_4.b3, _4.c3); + _4.a4 = max(_4.b4, _4.c4); + _4.a1 = clamp(_4.a1, _4.b1, _4.c1); + _4.a2 = clamp(_4.a2, _4.b2, _4.c2); + _4.a3 = clamp(_4.a3, _4.b3, _4.c3); + _4.a4 = clamp(_4.a4, _4.b4, _4.c4); + for (int i = 0; i < 2; i++, _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y)) + { + _4.a2 = min(_4.b2, _4.c2); + } +} + diff --git a/reference/shaders/comp/cfg.comp b/reference/shaders/comp/cfg.comp index a91c8732..77ad312c 100644 --- a/reference/shaders/comp/cfg.comp +++ b/reference/shaders/comp/cfg.comp @@ -8,7 +8,7 @@ layout(binding = 0, std430) buffer SSBO void test() { - if (!(_11.data == 0.0)) + if (_11.data != 0.0) { float tmp = 10.0; _11.data = tmp; @@ -18,12 +18,12 @@ void test() float tmp_1 = 15.0; _11.data = tmp_1; } - if (!(_11.data == 0.0)) + if (_11.data != 0.0) { float e; - if (!(_11.data == 5.0)) + if (_11.data != 5.0) { - if (!(_11.data == 6.0)) + if (_11.data != 6.0) { e = 10.0; } @@ -70,7 +70,7 @@ void test() float m; do { - } while (!(m == 20.0)); + } while (m != 20.0); _11.data = m; } diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag index 4ad8fc5a..2e5e0309 100644 --- a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag +++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -89,6 +89,8 @@ %38 = OpLoad %v4float %B %39 = OpFUnordEqual %v4bool %37 %38 OpStore %eq %39 + %ordered = OpFOrdNotEqual %v4bool %37 %38 + OpStore %neq %ordered %41 = OpLoad %v4float %A %42 = OpLoad %v4float %B %43 = OpFUnordNotEqual %v4bool %41 %42 diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000..2e5e0309 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %ordered = OpFOrdNotEqual %v4bool %37 %38 + OpStore %neq %ordered + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert similarity index 100% rename from shaders-hlsl-no-opt/vert/empty-shader.sm30.vert rename to shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..6c060eed --- /dev/null +++ b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag new file mode 100644 index 00000000..ad566615 --- /dev/null +++ b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag @@ -0,0 +1,293 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 205 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpExtension "SPV_AMD_gpu_shader_half_float" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4 + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpName %main "main" + OpName %res "res" + OpName %res2 "res2" + OpName %res3 "res3" + OpName %res4 "res4" + OpName %hres "hres" + OpName %hres2 "hres2" + OpName %hres3 "hres3" + OpName %hres4 "hres4" + OpName %v1 "v1" + OpName %v2 "v2" + OpName %v3 "v3" + OpName %v4 "v4" + OpName %h1 "h1" + OpName %h2 "h2" + OpName %h3 "h3" + OpName %h4 "h4" + OpDecorate %v1 Location 0 + OpDecorate %v2 Location 1 + OpDecorate %v3 Location 2 + OpDecorate %v4 Location 3 + OpDecorate %h1 Location 4 + OpDecorate %h2 Location 5 + OpDecorate %h3 Location 6 + OpDecorate %h4 Location 7 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Function_v2float = OpTypePointer Function %v2float +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Function_v3float = OpTypePointer Function %v3float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Function_half = OpTypePointer Function %half +%_ptr_Input_half = OpTypePointer Input %half +%_ptr_Function_v2half = OpTypePointer Function %v2half +%_ptr_Input_v2half = OpTypePointer Input %v2half +%_ptr_Function_v3half = OpTypePointer Function %v3half +%_ptr_Input_v3half = OpTypePointer Input %v3half +%_ptr_Function_v4half = OpTypePointer Function %v4half +%_ptr_Input_v4half = OpTypePointer Input %v4half + %v1 = OpVariable %_ptr_Input_float Input + %v2 = OpVariable %_ptr_Input_v2float Input + %v3 = OpVariable %_ptr_Input_v3float Input + %v4 = OpVariable %_ptr_Input_v4float Input + %h1 = OpVariable %_ptr_Input_half Input + %h2 = OpVariable %_ptr_Input_v2half Input + %h3 = OpVariable %_ptr_Input_v3half Input + %h4 = OpVariable %_ptr_Input_v4half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %res = OpVariable %_ptr_Function_float Function + %46 = OpLoad %float %v1 + %47 = OpLoad %float %v1 + %48 = OpExtInst %float %1 FMin %46 %47 + OpStore %res %48 + %49 = OpLoad %float %v1 + %50 = OpLoad %float %v1 + %51 = OpExtInst %float %1 FMax %49 %50 + OpStore %res %51 + %52 = OpLoad %float %v1 + %53 = OpLoad %float %v1 + %54 = OpLoad %float %v1 + %55 = OpExtInst %float %1 FClamp %52 %53 %54 + OpStore %res %55 + %56 = OpLoad %float %v1 + %57 = OpLoad %float %v1 + %58 = OpExtInst %float %1 NMin %56 %57 + OpStore %res %58 + %59 = OpLoad %float %v1 + %60 = OpLoad %float %v1 + %61 = OpExtInst %float %1 NMax %59 %60 + OpStore %res %61 + %62 = OpLoad %float %v1 + %63 = OpLoad %float %v1 + %64 = OpLoad %float %v1 + %65 = OpExtInst %float %1 NClamp %62 %63 %64 + OpStore %res %65 + %res2 = OpVariable %_ptr_Function_v2float Function + %66 = OpLoad %v2float %v2 + %67 = OpLoad %v2float %v2 + %68 = OpExtInst %v2float %1 FMin %66 %67 + OpStore %res2 %68 + %69 = OpLoad %v2float %v2 + %70 = OpLoad %v2float %v2 + %71 = OpExtInst %v2float %1 FMax %69 %70 + OpStore %res2 %71 + %72 = OpLoad %v2float %v2 + %73 = OpLoad %v2float %v2 + %74 = OpLoad %v2float %v2 + %75 = OpExtInst %v2float %1 FClamp %72 %73 %74 + OpStore %res2 %75 + %76 = OpLoad %v2float %v2 + %77 = OpLoad %v2float %v2 + %78 = OpExtInst %v2float %1 NMin %76 %77 + OpStore %res2 %78 + %79 = OpLoad %v2float %v2 + %80 = OpLoad %v2float %v2 + %81 = OpExtInst %v2float %1 NMax %79 %80 + OpStore %res2 %81 + %82 = OpLoad %v2float %v2 + %83 = OpLoad %v2float %v2 + %84 = OpLoad %v2float %v2 + %85 = OpExtInst %v2float %1 NClamp %82 %83 %84 + OpStore %res2 %85 + %res3 = OpVariable %_ptr_Function_v3float Function + %86 = OpLoad %v3float %v3 + %87 = OpLoad %v3float %v3 + %88 = OpExtInst %v3float %1 FMin %86 %87 + OpStore %res3 %88 + %89 = OpLoad %v3float %v3 + %90 = OpLoad %v3float %v3 + %91 = OpExtInst %v3float %1 FMax %89 %90 + OpStore %res3 %91 + %92 = OpLoad %v3float %v3 + %93 = OpLoad %v3float %v3 + %94 = OpLoad %v3float %v3 + %95 = OpExtInst %v3float %1 FClamp %92 %93 %94 + OpStore %res3 %95 + %96 = OpLoad %v3float %v3 + %97 = OpLoad %v3float %v3 + %98 = OpExtInst %v3float %1 NMin %96 %97 + OpStore %res3 %98 + %99 = OpLoad %v3float %v3 + %100 = OpLoad %v3float %v3 + %101 = OpExtInst %v3float %1 NMax %99 %100 + OpStore %res3 %101 + %102 = OpLoad %v3float %v3 + %103 = OpLoad %v3float %v3 + %104 = OpLoad %v3float %v3 + %105 = OpExtInst %v3float %1 NClamp %102 %103 %104 + OpStore %res3 %105 + %res4 = OpVariable %_ptr_Function_v4float Function + %106 = OpLoad %v4float %v4 + %107 = OpLoad %v4float %v4 + %108 = OpExtInst %v4float %1 FMin %106 %107 + OpStore %res4 %108 + %109 = OpLoad %v4float %v4 + %110 = OpLoad %v4float %v4 + %111 = OpExtInst %v4float %1 FMax %109 %110 + OpStore %res4 %111 + %112 = OpLoad %v4float %v4 + %113 = OpLoad %v4float %v4 + %114 = OpLoad %v4float %v4 + %115 = OpExtInst %v4float %1 FClamp %112 %113 %114 + OpStore %res4 %115 + %116 = OpLoad %v4float %v4 + %117 = OpLoad %v4float %v4 + %118 = OpExtInst %v4float %1 NMin %116 %117 + OpStore %res4 %118 + %119 = OpLoad %v4float %v4 + %120 = OpLoad %v4float %v4 + %121 = OpExtInst %v4float %1 NMax %119 %120 + OpStore %res4 %121 + %122 = OpLoad %v4float %v4 + %123 = OpLoad %v4float %v4 + %124 = OpLoad %v4float %v4 + %125 = OpExtInst %v4float %1 NClamp %122 %123 %124 + OpStore %res4 %125 + %hres = OpVariable %_ptr_Function_half Function + %126 = OpLoad %half %h1 + %127 = OpLoad %half %h1 + %128 = OpExtInst %half %1 FMin %126 %127 + OpStore %hres %128 + %129 = OpLoad %half %h1 + %130 = OpLoad %half %h1 + %131 = OpExtInst %half %1 FMax %129 %130 + OpStore %hres %131 + %132 = OpLoad %half %h1 + %133 = OpLoad %half %h1 + %134 = OpLoad %half %h1 + %135 = OpExtInst %half %1 FClamp %132 %133 %134 + OpStore %hres %135 + %136 = OpLoad %half %h1 + %137 = OpLoad %half %h1 + %138 = OpExtInst %half %1 NMin %136 %137 + OpStore %hres %138 + %139 = OpLoad %half %h1 + %140 = OpLoad %half %h1 + %141 = OpExtInst %half %1 NMax %139 %140 + OpStore %hres %141 + %142 = OpLoad %half %h1 + %143 = OpLoad %half %h1 + %144 = OpLoad %half %h1 + %145 = OpExtInst %half %1 NClamp %142 %143 %144 + OpStore %hres %145 + %hres2 = OpVariable %_ptr_Function_v2half Function + %146 = OpLoad %v2half %h2 + %147 = OpLoad %v2half %h2 + %148 = OpExtInst %v2half %1 FMin %146 %147 + OpStore %hres2 %148 + %149 = OpLoad %v2half %h2 + %150 = OpLoad %v2half %h2 + %151 = OpExtInst %v2half %1 FMax %149 %150 + OpStore %hres2 %151 + %152 = OpLoad %v2half %h2 + %153 = OpLoad %v2half %h2 + %154 = OpLoad %v2half %h2 + %155 = OpExtInst %v2half %1 FClamp %152 %153 %154 + OpStore %hres2 %155 + %156 = OpLoad %v2half %h2 + %157 = OpLoad %v2half %h2 + %158 = OpExtInst %v2half %1 NMin %156 %157 + OpStore %hres2 %158 + %159 = OpLoad %v2half %h2 + %160 = OpLoad %v2half %h2 + %161 = OpExtInst %v2half %1 NMax %159 %160 + OpStore %hres2 %161 + %162 = OpLoad %v2half %h2 + %163 = OpLoad %v2half %h2 + %164 = OpLoad %v2half %h2 + %165 = OpExtInst %v2half %1 NClamp %162 %163 %164 + OpStore %hres2 %165 + %hres3 = OpVariable %_ptr_Function_v3half Function + %166 = OpLoad %v3half %h3 + %167 = OpLoad %v3half %h3 + %168 = OpExtInst %v3half %1 FMin %166 %167 + OpStore %hres3 %168 + %169 = OpLoad %v3half %h3 + %170 = OpLoad %v3half %h3 + %171 = OpExtInst %v3half %1 FMax %169 %170 + OpStore %hres3 %171 + %172 = OpLoad %v3half %h3 + %173 = OpLoad %v3half %h3 + %174 = OpLoad %v3half %h3 + %175 = OpExtInst %v3half %1 FClamp %172 %173 %174 + OpStore %hres3 %175 + %176 = OpLoad %v3half %h3 + %177 = OpLoad %v3half %h3 + %178 = OpExtInst %v3half %1 NMin %176 %177 + OpStore %hres3 %178 + %179 = OpLoad %v3half %h3 + %180 = OpLoad %v3half %h3 + %181 = OpExtInst %v3half %1 NMax %179 %180 + OpStore %hres3 %181 + %182 = OpLoad %v3half %h3 + %183 = OpLoad %v3half %h3 + %184 = OpLoad %v3half %h3 + %185 = OpExtInst %v3half %1 NClamp %182 %183 %184 + OpStore %hres3 %185 + %hres4 = OpVariable %_ptr_Function_v4half Function + %186 = OpLoad %v4half %h4 + %187 = OpLoad %v4half %h4 + %188 = OpExtInst %v4half %1 FMin %186 %187 + OpStore %hres4 %188 + %189 = OpLoad %v4half %h4 + %190 = OpLoad %v4half %h4 + %191 = OpExtInst %v4half %1 FMax %189 %190 + OpStore %hres4 %191 + %192 = OpLoad %v4half %h4 + %193 = OpLoad %v4half %h4 + %194 = OpLoad %v4half %h4 + %195 = OpExtInst %v4half %1 FClamp %192 %193 %194 + OpStore %hres4 %195 + %196 = OpLoad %v4half %h4 + %197 = OpLoad %v4half %h4 + %198 = OpExtInst %v4half %1 NMin %196 %197 + OpStore %hres4 %198 + %199 = OpLoad %v4half %h4 + %200 = OpLoad %v4half %h4 + %201 = OpExtInst %v4half %1 NMax %199 %200 + OpStore %hres4 %201 + %202 = OpLoad %v4half %h4 + %203 = OpLoad %v4half %h4 + %204 = OpLoad %v4half %h4 + %205 = OpExtInst %v4half %1 NClamp %202 %203 %204 + OpStore %hres4 %205 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/frag/unord-relational-op.asm.frag b/shaders-msl/asm/frag/unord-relational-op.asm.frag index 3e4cd6c2..824c0512 100644 --- a/shaders-msl/asm/frag/unord-relational-op.asm.frag +++ b/shaders-msl/asm/frag/unord-relational-op.asm.frag @@ -114,6 +114,8 @@ OpStore %t1 %b %15 = OpFUnordEqual %bool %a %b OpStore %c1 %15 + %ordered = OpFOrdNotEqual %bool %a %b + OpStore %c1 %ordered %17 = OpFUnordNotEqual %bool %a %b OpStore %c2 %17 %19 = OpFUnordLessThan %bool %a %b diff --git a/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag new file mode 100644 index 00000000..824c0512 --- /dev/null +++ b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag @@ -0,0 +1,207 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 122 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %c %d %e %f %g %h %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 460 + OpName %main "main" + OpName %t0 "t0" + OpName %a "a" + OpName %t1 "t1" + OpName %b "b" + OpName %c1 "c1" + OpName %c2 "c2" + OpName %c3 "c3" + OpName %c4 "c4" + OpName %c5 "c5" + OpName %c6 "c6" + OpName %c7 "c7" + OpName %c "c" + OpName %d "d" + OpName %c8 "c8" + OpName %c9 "c9" + OpName %c10 "c10" + OpName %c11 "c11" + OpName %c12 "c12" + OpName %c13 "c13" + OpName %e "e" + OpName %f "f" + OpName %c14 "c14" + OpName %c15 "c15" + OpName %c16 "c16" + OpName %c17 "c17" + OpName %c18 "c18" + OpName %c19 "c19" + OpName %g "g" + OpName %h "h" + OpName %c20 "c20" + OpName %c21 "c21" + OpName %c22 "c22" + OpName %c23 "c23" + OpName %c24 "c24" + OpName %FragColor "FragColor" + OpDecorate %a SpecId 1 + OpDecorate %b SpecId 2 + OpDecorate %c Location 2 + OpDecorate %d Location 3 + OpDecorate %e Location 4 + OpDecorate %f Location 5 + OpDecorate %g Location 6 + OpDecorate %h Location 7 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float + %a = OpSpecConstant %float 1 + %b = OpSpecConstant %float 2 + %bool = OpTypeBool +%_ptr_Function_bool = OpTypePointer Function %bool + %v2bool = OpTypeVector %bool 2 +%_ptr_Function_v2bool = OpTypePointer Function %v2bool + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %c = OpVariable %_ptr_Input_v2float Input + %d = OpVariable %_ptr_Input_v2float Input + %v3bool = OpTypeVector %bool 3 +%_ptr_Function_v3bool = OpTypePointer Function %v3bool + %v3float = OpTypeVector %float 3 +%_ptr_Input_v3float = OpTypePointer Input %v3float + %e = OpVariable %_ptr_Input_v3float Input + %f = OpVariable %_ptr_Input_v3float Input + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %g = OpVariable %_ptr_Input_v4float Input + %h = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %t0 = OpVariable %_ptr_Function_float Function + %t1 = OpVariable %_ptr_Function_float Function + %c1 = OpVariable %_ptr_Function_bool Function + %c2 = OpVariable %_ptr_Function_bool Function + %c3 = OpVariable %_ptr_Function_bool Function + %c4 = OpVariable %_ptr_Function_bool Function + %c5 = OpVariable %_ptr_Function_bool Function + %c6 = OpVariable %_ptr_Function_bool Function + %c7 = OpVariable %_ptr_Function_v2bool Function + %c8 = OpVariable %_ptr_Function_v2bool Function + %c9 = OpVariable %_ptr_Function_v2bool Function + %c10 = OpVariable %_ptr_Function_v2bool Function + %c11 = OpVariable %_ptr_Function_v2bool Function + %c12 = OpVariable %_ptr_Function_v2bool Function + %c13 = OpVariable %_ptr_Function_v3bool Function + %c14 = OpVariable %_ptr_Function_v3bool Function + %c15 = OpVariable %_ptr_Function_v3bool Function + %c16 = OpVariable %_ptr_Function_v3bool Function + %c17 = OpVariable %_ptr_Function_v3bool Function + %c18 = OpVariable %_ptr_Function_v3bool Function + %c19 = OpVariable %_ptr_Function_v4bool Function + %c20 = OpVariable %_ptr_Function_v4bool Function + %c21 = OpVariable %_ptr_Function_v4bool Function + %c22 = OpVariable %_ptr_Function_v4bool Function + %c23 = OpVariable %_ptr_Function_v4bool Function + %c24 = OpVariable %_ptr_Function_v4bool Function + OpStore %t0 %a + OpStore %t1 %b + %15 = OpFUnordEqual %bool %a %b + OpStore %c1 %15 + %ordered = OpFOrdNotEqual %bool %a %b + OpStore %c1 %ordered + %17 = OpFUnordNotEqual %bool %a %b + OpStore %c2 %17 + %19 = OpFUnordLessThan %bool %a %b + OpStore %c3 %19 + %21 = OpFUnordGreaterThan %bool %a %b + OpStore %c4 %21 + %23 = OpFUnordLessThanEqual %bool %a %b + OpStore %c5 %23 + %25 = OpFUnordGreaterThanEqual %bool %a %b + OpStore %c6 %25 + %32 = OpLoad %v2float %c + %34 = OpLoad %v2float %d + %35 = OpFUnordEqual %v2bool %32 %34 + OpStore %c7 %35 + %37 = OpLoad %v2float %c + %38 = OpLoad %v2float %d + %39 = OpFUnordNotEqual %v2bool %37 %38 + OpStore %c8 %39 + %41 = OpLoad %v2float %c + %42 = OpLoad %v2float %d + %43 = OpFUnordLessThan %v2bool %41 %42 + OpStore %c9 %43 + %45 = OpLoad %v2float %c + %46 = OpLoad %v2float %d + %47 = OpFUnordGreaterThan %v2bool %45 %46 + OpStore %c10 %47 + %49 = OpLoad %v2float %c + %50 = OpLoad %v2float %d + %51 = OpFUnordLessThanEqual %v2bool %49 %50 + OpStore %c11 %51 + %53 = OpLoad %v2float %c + %54 = OpLoad %v2float %d + %55 = OpFUnordGreaterThanEqual %v2bool %53 %54 + OpStore %c12 %55 + %62 = OpLoad %v3float %e + %64 = OpLoad %v3float %f + %65 = OpFUnordEqual %v3bool %62 %64 + OpStore %c13 %65 + %67 = OpLoad %v3float %e + %68 = OpLoad %v3float %f + %69 = OpFUnordNotEqual %v3bool %67 %68 + OpStore %c14 %69 + %71 = OpLoad %v3float %e + %72 = OpLoad %v3float %f + %73 = OpFUnordLessThan %v3bool %71 %72 + OpStore %c15 %73 + %75 = OpLoad %v3float %e + %76 = OpLoad %v3float %f + %77 = OpFUnordGreaterThan %v3bool %75 %76 + OpStore %c16 %77 + %79 = OpLoad %v3float %e + %80 = OpLoad %v3float %f + %81 = OpFUnordLessThanEqual %v3bool %79 %80 + OpStore %c17 %81 + %83 = OpLoad %v3float %e + %84 = OpLoad %v3float %f + %85 = OpFUnordGreaterThanEqual %v3bool %83 %84 + OpStore %c18 %85 + %92 = OpLoad %v4float %g + %94 = OpLoad %v4float %h + %95 = OpFUnordEqual %v4bool %92 %94 + OpStore %c19 %95 + %97 = OpLoad %v4float %g + %98 = OpLoad %v4float %h + %99 = OpFUnordNotEqual %v4bool %97 %98 + OpStore %c20 %99 + %101 = OpLoad %v4float %g + %102 = OpLoad %v4float %h + %103 = OpFUnordLessThan %v4bool %101 %102 + OpStore %c21 %103 + %105 = OpLoad %v4float %g + %106 = OpLoad %v4float %h + %107 = OpFUnordGreaterThan %v4bool %105 %106 + OpStore %c22 %107 + %109 = OpLoad %v4float %g + %110 = OpLoad %v4float %h + %111 = OpFUnordLessThanEqual %v4bool %109 %110 + OpStore %c23 %111 + %113 = OpLoad %v4float %g + %114 = OpLoad %v4float %h + %115 = OpFUnordGreaterThanEqual %v4bool %113 %114 + OpStore %c24 %115 + %118 = OpLoad %float %t0 + %119 = OpLoad %float %t1 + %120 = OpFAdd %float %118 %119 + %121 = OpCompositeConstruct %v4float %120 %120 %120 %120 + OpStore %FragColor %121 + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag index 4ad8fc5a..15286e08 100644 --- a/shaders-no-opt/asm/frag/unordered-compare.asm.frag +++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -93,6 +93,8 @@ %42 = OpLoad %v4float %B %43 = OpFUnordNotEqual %v4bool %41 %42 OpStore %neq %43 + %ordered = OpFOrdNotEqual %v4bool %41 %42 + OpStore %neq %ordered %44 = OpLoad %v4bool %le %49 = OpSelect %v4float %44 %48 %47 %50 = OpLoad %v4bool %leq diff --git a/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag new file mode 100644 index 00000000..15286e08 --- /dev/null +++ b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag @@ -0,0 +1,179 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %ordered = OpFOrdNotEqual %v4bool %41 %42 + OpStore %neq %ordered + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp new file mode 100644 index 00000000..6c060eed --- /dev/null +++ b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index db98de15..4a62b635 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -475,6 +475,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT: options->glsl.ovr_multiview_view_count = value; break; + case SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS: + options->glsl.relax_nan_checks = value != 0; + break; #endif #if SPIRV_CROSS_C_API_HLSL diff --git a/spirv_cross_c.h b/spirv_cross_c.h index a590c805..a35a5d65 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -40,7 +40,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 48 +#define SPVC_C_API_VERSION_MINOR 49 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -677,6 +677,8 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index bacf709c..4c465133 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -7307,6 +7307,63 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool return farg_str; } +Op CompilerGLSL::get_remapped_spirv_op(Op op) const +{ + if (options.relax_nan_checks) + { + switch (op) + { + case OpFUnordLessThan: + op = OpFOrdLessThan; + break; + case OpFUnordLessThanEqual: + op = OpFOrdLessThanEqual; + break; + case OpFUnordGreaterThan: + op = OpFOrdGreaterThan; + break; + case OpFUnordGreaterThanEqual: + op = OpFOrdGreaterThanEqual; + break; + case OpFUnordEqual: + op = OpFOrdEqual; + break; + case OpFOrdNotEqual: + op = OpFUnordNotEqual; + break; + + default: + break; + } + } + + return op; +} + +GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const +{ + // Relax to non-NaN aware opcodes. + if (options.relax_nan_checks) + { + switch (std450_op) + { + case GLSLstd450NClamp: + std450_op = GLSLstd450FClamp; + break; + case GLSLstd450NMin: + std450_op = GLSLstd450FMin; + break; + case GLSLstd450NMax: + std450_op = GLSLstd450FMax; + break; + default: + break; + } + } + + return std450_op; +} + void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) { auto op = static_cast(eop); @@ -7319,6 +7376,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + op = get_remapped_glsl_op(op); + switch (op) { // FP fiddling @@ -10094,6 +10153,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + opcode = get_remapped_spirv_op(opcode); + switch (opcode) { // Dealing with memory @@ -11220,7 +11281,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + case OpFUnordNotEqual: { + // GLSL is fuzzy on what to do with ordered vs unordered not equal. + // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, + // but this means we have no easy way of implementing ordered not equal. if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(notEqual); else @@ -12540,7 +12605,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; case OpFUnordEqual: - case OpFUnordNotEqual: case OpFUnordLessThan: case OpFUnordGreaterThan: case OpFUnordLessThanEqual: @@ -12563,10 +12627,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) comp_op = "notEqual"; break; - case OpFUnordNotEqual: - comp_op = "equal"; - break; - case OpFUnordLessThan: comp_op = "greaterThanEqual"; break; @@ -12599,10 +12659,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) comp_op = " != "; break; - case OpFUnordNotEqual: - comp_op = " == "; - break; - case OpFUnordLessThan: comp_op = " >= "; break; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index b2073abd..b892e0c3 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -138,6 +138,13 @@ public: // what happens on legacy GLSL targets for blocks and structs. bool force_flattened_io_blocks = false; + // For opcodes where we have to perform explicit additional nan checks, very ugly code is generated. + // If we opt-in, ignore these requirements. + // In opcodes like NClamp/NMin/NMax and FP compare, ignore NaN behavior. + // Use FClamp/FMin/FMax semantics for clamps and lets implementation choose ordered or unordered + // compares. + bool relax_nan_checks = false; + // If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2. uint32_t ovr_multiview_view_count = 0; @@ -362,6 +369,11 @@ protected: virtual void emit_instruction(const Instruction &instr); void emit_block_instructions(SPIRBlock &block); + + // For relax_nan_checks. + GLSLstd450 get_remapped_glsl_op(GLSLstd450 std450_op) const; + spv::Op get_remapped_spirv_op(spv::Op op) const; + virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op, diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 4e4e4ca7..646fce33 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -3524,6 +3524,8 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + op = get_remapped_glsl_op(op); + switch (op) { case GLSLstd450InverseSqrt: @@ -4792,6 +4794,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + opcode = get_remapped_spirv_op(opcode); + switch (opcode) { case OpAccessChain: diff --git a/spirv_msl.cpp b/spirv_msl.cpp index db5ef5d1..4eeb3b7f 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -7632,6 +7632,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) auto ops = stream(instruction); auto opcode = static_cast(instruction.op); + opcode = get_remapped_spirv_op(opcode); + // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); @@ -7674,6 +7676,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + // TODO: Should probably negate the == result here. + // Typically OrdNotEqual comes from GLSL which itself does not really specify what + // happens with NaN. + // Consider fixing this if we run into real issues. MSL_BOP(!=); break; @@ -7730,7 +7736,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; case OpFUnordNotEqual: - MSL_UNORD_BOP(!=); + // not equal in MSL generates une opcodes to begin with. + // Since unordered not equal is how it works in C, just inherit that behavior. + MSL_BOP(!=); break; case OpFUnordGreaterThan: @@ -8993,6 +9001,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); + op = get_remapped_glsl_op(op); + switch (op) { case GLSLstd450Sinh: diff --git a/test_shaders.py b/test_shaders.py index 640baa23..49038939 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -350,6 +350,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): if '.mask-clip-distance.' in shader: msl_args.append('--mask-stage-output-builtin') msl_args.append('ClipDistance') + if '.relax-nan.' in shader: + msl_args.append('--relax-nan-checks') subprocess.check_call(msl_args) @@ -474,6 +476,8 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati hlsl_args.append('--hlsl-enable-16bit-types') if '.flatten-matrix-vertex-input.' in shader: hlsl_args.append('--hlsl-flatten-matrix-vertex-input-semantics') + if '.relax-nan.' in shader: + hlsl_args.append('--relax-nan-checks') subprocess.check_call(hlsl_args) @@ -576,6 +580,8 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl extra_args += ['--force-zero-initialized-variables'] if '.force-flattened-io.' in shader: extra_args += ['--glsl-force-flattened-io-blocks'] + if '.relax-nan.' in shader: + extra_args.append('--relax-nan-checks') spirv_cross_path = paths.spirv_cross