From 14a4b087fbaa06d2780668816baa3fbdffd56e83 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 14 Oct 2019 13:48:22 +0200 Subject: [PATCH 1/4] GLSL: Support unordered floating point compare. There is no direct way to express this, so invert boolean results to force any NaN -> true. glslang emits Ordered compare instructions everywhere, and the GLSL spec is not clear on this, so assume this is fine. --- .../asm/frag/unordered-compare.asm.frag | 33 ++++ .../asm/frag/unordered-compare.asm.frag | 177 ++++++++++++++++++ spirv_glsl.cpp | 102 ++++++++-- 3 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag create mode 100644 shaders-no-opt/asm/frag/unordered-compare.asm.frag diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000..2cc81295 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,33 @@ +#version 450 + +layout(location = 0) in vec4 A; +layout(location = 1) in vec4 B; +layout(location = 0) out vec4 FragColor; + +vec4 test_vector() +{ + bvec4 le = not(greaterThanEqual(A, B)); + bvec4 leq = not(greaterThan(A, B)); + bvec4 ge = not(lessThanEqual(A, B)); + bvec4 geq = not(lessThan(A, B)); + bvec4 eq = not(notEqual(A, B)); + bvec4 neq = not(equal(A, B)); + return ((((mix(vec4(0.0), vec4(1.0), le) + mix(vec4(0.0), vec4(1.0), leq)) + mix(vec4(0.0), vec4(1.0), ge)) + mix(vec4(0.0), vec4(1.0), geq)) + mix(vec4(0.0), vec4(1.0), eq)) + mix(vec4(0.0), vec4(1.0), neq); +} + +float test_scalar() +{ + bool le = !(A.x >= B.x); + bool leq = !(A.x > B.x); + bool ge = !(A.x <= B.x); + bool geq = !(A.x < B.x); + bool eq = !(A.x != B.x); + bool neq = !(A.x == B.x); + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void main() +{ + FragColor = test_vector() + vec4(test_scalar()); +} + diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000..4ad8fc5a --- /dev/null +++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,177 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0dd234eb..34c95d87 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -10073,28 +10073,98 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; case OpFUnordEqual: - GLSL_BFOP(unsupported_FUnordEqual); - break; - case OpFUnordNotEqual: - GLSL_BFOP(unsupported_FUnordNotEqual); - break; - case OpFUnordLessThan: - GLSL_BFOP(unsupported_FUnordLessThan); - break; - case OpFUnordGreaterThan: - GLSL_BFOP(unsupported_FUnordGreaterThan); - break; - case OpFUnordLessThanEqual: - GLSL_BFOP(unsupported_FUnordLessThanEqual); - break; - case OpFUnordGreaterThanEqual: - GLSL_BFOP(unsupported_FUnordGreaterThanEqual); + { + // GLSL doesn't specify if floating point comparisons are ordered or unordered, + // but glslang always emits ordered floating point compares for GLSL. + // To get unordered compares, we can test the opposite thing and invert the result. + // This way, we force true when there is any NaN present. + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + string expr; + if (expression_type(op0).vecsize > 1) + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = "notEqual"; + break; + + case OpFUnordNotEqual: + comp_op = "equal"; + break; + + case OpFUnordLessThan: + comp_op = "greaterThanEqual"; + break; + + case OpFUnordLessThanEqual: + comp_op = "greaterThan"; + break; + + case OpFUnordGreaterThan: + comp_op = "lessThanEqual"; + break; + + case OpFUnordGreaterThanEqual: + comp_op = "lessThan"; + break; + + default: + assert(0); + break; + } + + expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); + } + else + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = " != "; + break; + + case OpFUnordNotEqual: + comp_op = " == "; + break; + + case OpFUnordLessThan: + comp_op = " >= "; + break; + + case OpFUnordLessThanEqual: + comp_op = " > "; + break; + + case OpFUnordGreaterThan: + comp_op = " <= "; + break; + + case OpFUnordGreaterThanEqual: + comp_op = " < "; + break; + + default: + assert(0); + break; + } + + expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); + } + + emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(ops[1], op0); + inherit_expression_dependencies(ops[1], op1); break; + } case OpReportIntersectionNV: statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); From b960ae3b7089c216b1647ba07c7f7b2a62f11a90 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 14 Oct 2019 15:04:00 +0200 Subject: [PATCH 2/4] HLSL: Partially implement Unordered compare. We cannot correctly implement unordered equal/ordered not equal without a lot of extra instructions which slows normal code down. --- .../asm/frag/unordered-compare.asm.frag | 51 +++++ .../asm/frag/unordered-compare.asm.frag | 177 ++++++++++++++++++ spirv_glsl.cpp | 10 +- spirv_glsl.hpp | 2 +- spirv_hlsl.cpp | 90 +++++++-- 5 files changed, 314 insertions(+), 16 deletions(-) create mode 100644 reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag create mode 100644 shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000..1143ade7 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,51 @@ +static float4 A; +static float4 B; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 A : TEXCOORD0; + float4 B : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +float4 test_vector() +{ + bool4 le = bool4(!(A.x >= B.x), !(A.y >= B.y), !(A.z >= B.z), !(A.w >= B.w)); + bool4 leq = bool4(!(A.x > B.x), !(A.y > B.y), !(A.z > B.z), !(A.w > B.w)); + bool4 ge = bool4(!(A.x <= B.x), !(A.y <= B.y), !(A.z <= B.z), !(A.w <= B.w)); + bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w)); + bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w); + bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w); + return ((((float4(le.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, le.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, le.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, le.w ? 1.0f.xxxx.w : 0.0f.xxxx.w) + float4(leq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, leq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, leq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, leq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(ge.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, ge.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, ge.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, ge.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(geq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, geq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, geq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, geq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(eq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, eq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, eq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, eq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w)) + float4(neq.x ? 1.0f.xxxx.x : 0.0f.xxxx.x, neq.y ? 1.0f.xxxx.y : 0.0f.xxxx.y, neq.z ? 1.0f.xxxx.z : 0.0f.xxxx.z, neq.w ? 1.0f.xxxx.w : 0.0f.xxxx.w); +} + +float test_scalar() +{ + bool le = !(A.x >= B.x); + bool leq = !(A.x > B.x); + bool ge = !(A.x <= B.x); + bool geq = !(A.x < B.x); + bool eq = A.x == B.x; + bool neq = A.x != B.x; + return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq); +} + +void frag_main() +{ + FragColor = test_vector() + test_scalar().xxxx; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + A = stage_input.A; + B = stage_input.B; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag new file mode 100644 index 00000000..4ad8fc5a --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag @@ -0,0 +1,177 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 132 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %A %B %FragColor + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %test_vector_ "test_vector(" + OpName %test_scalar_ "test_scalar(" + OpName %le "le" + OpName %A "A" + OpName %B "B" + OpName %leq "leq" + OpName %ge "ge" + OpName %geq "geq" + OpName %eq "eq" + OpName %neq "neq" + OpName %le_0 "le" + OpName %leq_0 "leq" + OpName %ge_0 "ge" + OpName %geq_0 "geq" + OpName %eq_0 "eq" + OpName %neq_0 "neq" + OpName %FragColor "FragColor" + OpDecorate %A Location 0 + OpDecorate %B Location 1 + OpDecorate %FragColor Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %8 = OpTypeFunction %v4float + %11 = OpTypeFunction %float + %bool = OpTypeBool + %v4bool = OpTypeVector %bool 4 +%_ptr_Function_v4bool = OpTypePointer Function %v4bool +%_ptr_Input_v4float = OpTypePointer Input %v4float + %A = OpVariable %_ptr_Input_v4float Input + %B = OpVariable %_ptr_Input_v4float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 +%_ptr_Function_bool = OpTypePointer Function %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %128 = OpFunctionCall %v4float %test_vector_ + %129 = OpFunctionCall %float %test_scalar_ + %130 = OpCompositeConstruct %v4float %129 %129 %129 %129 + %131 = OpFAdd %v4float %128 %130 + OpStore %FragColor %131 + OpReturn + OpFunctionEnd +%test_vector_ = OpFunction %v4float None %8 + %10 = OpLabel + %le = OpVariable %_ptr_Function_v4bool Function + %leq = OpVariable %_ptr_Function_v4bool Function + %ge = OpVariable %_ptr_Function_v4bool Function + %geq = OpVariable %_ptr_Function_v4bool Function + %eq = OpVariable %_ptr_Function_v4bool Function + %neq = OpVariable %_ptr_Function_v4bool Function + %20 = OpLoad %v4float %A + %22 = OpLoad %v4float %B + %23 = OpFUnordLessThan %v4bool %20 %22 + OpStore %le %23 + %25 = OpLoad %v4float %A + %26 = OpLoad %v4float %B + %27 = OpFUnordLessThanEqual %v4bool %25 %26 + OpStore %leq %27 + %29 = OpLoad %v4float %A + %30 = OpLoad %v4float %B + %31 = OpFUnordGreaterThan %v4bool %29 %30 + OpStore %ge %31 + %33 = OpLoad %v4float %A + %34 = OpLoad %v4float %B + %35 = OpFUnordGreaterThanEqual %v4bool %33 %34 + OpStore %geq %35 + %37 = OpLoad %v4float %A + %38 = OpLoad %v4float %B + %39 = OpFUnordEqual %v4bool %37 %38 + OpStore %eq %39 + %41 = OpLoad %v4float %A + %42 = OpLoad %v4float %B + %43 = OpFUnordNotEqual %v4bool %41 %42 + OpStore %neq %43 + %44 = OpLoad %v4bool %le + %49 = OpSelect %v4float %44 %48 %47 + %50 = OpLoad %v4bool %leq + %51 = OpSelect %v4float %50 %48 %47 + %52 = OpFAdd %v4float %49 %51 + %53 = OpLoad %v4bool %ge + %54 = OpSelect %v4float %53 %48 %47 + %55 = OpFAdd %v4float %52 %54 + %56 = OpLoad %v4bool %geq + %57 = OpSelect %v4float %56 %48 %47 + %58 = OpFAdd %v4float %55 %57 + %59 = OpLoad %v4bool %eq + %60 = OpSelect %v4float %59 %48 %47 + %61 = OpFAdd %v4float %58 %60 + %62 = OpLoad %v4bool %neq + %63 = OpSelect %v4float %62 %48 %47 + %64 = OpFAdd %v4float %61 %63 + OpReturnValue %64 + OpFunctionEnd +%test_scalar_ = OpFunction %float None %11 + %13 = OpLabel + %le_0 = OpVariable %_ptr_Function_bool Function + %leq_0 = OpVariable %_ptr_Function_bool Function + %ge_0 = OpVariable %_ptr_Function_bool Function + %geq_0 = OpVariable %_ptr_Function_bool Function + %eq_0 = OpVariable %_ptr_Function_bool Function + %neq_0 = OpVariable %_ptr_Function_bool Function + %72 = OpAccessChain %_ptr_Input_float %A %uint_0 + %73 = OpLoad %float %72 + %74 = OpAccessChain %_ptr_Input_float %B %uint_0 + %75 = OpLoad %float %74 + %76 = OpFUnordLessThan %bool %73 %75 + OpStore %le_0 %76 + %78 = OpAccessChain %_ptr_Input_float %A %uint_0 + %79 = OpLoad %float %78 + %80 = OpAccessChain %_ptr_Input_float %B %uint_0 + %81 = OpLoad %float %80 + %82 = OpFUnordLessThanEqual %bool %79 %81 + OpStore %leq_0 %82 + %84 = OpAccessChain %_ptr_Input_float %A %uint_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Input_float %B %uint_0 + %87 = OpLoad %float %86 + %88 = OpFUnordGreaterThan %bool %85 %87 + OpStore %ge_0 %88 + %90 = OpAccessChain %_ptr_Input_float %A %uint_0 + %91 = OpLoad %float %90 + %92 = OpAccessChain %_ptr_Input_float %B %uint_0 + %93 = OpLoad %float %92 + %94 = OpFUnordGreaterThanEqual %bool %91 %93 + OpStore %geq_0 %94 + %96 = OpAccessChain %_ptr_Input_float %A %uint_0 + %97 = OpLoad %float %96 + %98 = OpAccessChain %_ptr_Input_float %B %uint_0 + %99 = OpLoad %float %98 + %100 = OpFUnordEqual %bool %97 %99 + OpStore %eq_0 %100 + %102 = OpAccessChain %_ptr_Input_float %A %uint_0 + %103 = OpLoad %float %102 + %104 = OpAccessChain %_ptr_Input_float %B %uint_0 + %105 = OpLoad %float %104 + %106 = OpFUnordNotEqual %bool %103 %105 + OpStore %neq_0 %106 + %107 = OpLoad %bool %le_0 + %108 = OpSelect %float %107 %float_1 %float_0 + %109 = OpLoad %bool %leq_0 + %110 = OpSelect %float %109 %float_1 %float_0 + %111 = OpFAdd %float %108 %110 + %112 = OpLoad %bool %ge_0 + %113 = OpSelect %float %112 %float_1 %float_0 + %114 = OpFAdd %float %111 %113 + %115 = OpLoad %bool %geq_0 + %116 = OpSelect %float %115 %float_1 %float_0 + %117 = OpFAdd %float %114 %116 + %118 = OpLoad %bool %eq_0 + %119 = OpSelect %float %118 %float_1 %float_0 + %120 = OpFAdd %float %117 %119 + %121 = OpLoad %bool %neq_0 + %122 = OpSelect %float %121 %float_1 %float_0 + %123 = OpFAdd %float %120 %122 + OpReturnValue %123 + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 34c95d87..e7fb3fe3 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -4159,7 +4159,7 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_ } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op) + const char *op, bool negate) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); @@ -4168,11 +4168,15 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. + if (negate) + expr += "!("; expr += to_extract_component_expression(op0, i); expr += ' '; expr += op; expr += ' '; expr += to_extract_component_expression(op1, i); + if (negate) + expr += ")"; if (i + 1 < type.vecsize) expr += ", "; @@ -8838,7 +8842,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false); else GLSL_BOP(||); break; @@ -8852,7 +8856,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false); else GLSL_BOP(&&); break; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index e989a790..ddce638b 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -471,7 +471,7 @@ protected: void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); - void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate); void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 1946096e..8241d3b6 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -4194,7 +4194,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false); else HLSL_BOP_CAST(==, int_type); break; @@ -4202,12 +4202,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalEqual: case OpFOrdEqual: + case OpFUnordEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. + // HACK: FUnordEqual will be implemented as FOrdEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false); else HLSL_BOP(==); break; @@ -4219,7 +4226,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false); else HLSL_BOP_CAST(!=, int_type); break; @@ -4227,12 +4234,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + case OpFUnordNotEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + + // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. + // We would need to do something like not(UnordEqual), but that cannot be expressed either. + // Adding a lot of NaN checks would be a breaking change from perspective of performance. + // SPIR-V will generally use isnan() checks when this even matters. + // HACK: FOrdNotEqual will be implemented as FUnordEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false); else HLSL_BOP(!=); break; @@ -4246,7 +4264,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false); else HLSL_BOP_CAST(>, type); break; @@ -4258,12 +4276,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false); else HLSL_BOP(>); break; } + case OpFUnordGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpUGreaterThanEqual: case OpSGreaterThanEqual: { @@ -4272,7 +4302,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false); else HLSL_BOP_CAST(>=, type); break; @@ -4284,12 +4314,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false); else HLSL_BOP(>=); break; } + case OpFUnordGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThan: case OpSLessThan: { @@ -4298,7 +4340,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false); else HLSL_BOP_CAST(<, type); break; @@ -4310,12 +4352,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false); else HLSL_BOP(<); break; } + case OpFUnordLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThanEqual: case OpSLessThanEqual: { @@ -4324,7 +4378,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false); else HLSL_BOP_CAST(<=, type); break; @@ -4336,12 +4390,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false); else HLSL_BOP(<=); break; } + case OpFUnordLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpImageQueryLod: emit_texture_op(instruction); break; From 3bf9fa7ed6b11fca301a60c0076d4caab4d4e782 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 14 Oct 2019 15:23:38 +0200 Subject: [PATCH 3/4] GLSL: Deal correctly with bitwidth on integer compares. --- spirv_glsl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index e7fb3fe3..ca420d5c 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -8913,7 +8913,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpUGreaterThan: case OpSGreaterThan: { - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThan, type); else @@ -8933,7 +8933,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpUGreaterThanEqual: case OpSGreaterThanEqual: { - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThanEqual, type); else @@ -8953,7 +8953,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpULessThan: case OpSLessThan: { - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThan, type); else @@ -8973,7 +8973,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpULessThanEqual: case OpSLessThanEqual: { - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThanEqual, type); else From a9be92569f4c1f5ee813c04f220921583a7dcaf3 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 14 Oct 2019 16:08:39 +0200 Subject: [PATCH 4/4] HLSL: Fix unrolled S/G LE/LT/GE/GT opcodes. Need to bitcast the unrolled expressions as well. --- .../asm/comp/bitcast_icmp.asm.comp | 28 +++++ .../asm/comp/bitcast_icmp.asm.comp | 29 +++++ .../shaders/asm/comp/bitcast_icmp.asm.comp | 27 +++++ .../asm/comp/bitcast_icmp.asm.comp | 28 +++++ .../asm/comp/bitcast_icmp.asm.comp | 29 +++++ .../shaders/asm/comp/bitcast_icmp.asm.comp | 27 +++++ shaders-hlsl/asm/comp/bitcast_icmp.asm.comp | 101 ++++++++++++++++++ shaders-msl/asm/comp/bitcast_icmp.asm.comp | 101 ++++++++++++++++++ shaders/asm/comp/bitcast_icmp.asm.comp | 101 ++++++++++++++++++ spirv_glsl.cpp | 34 +++++- spirv_glsl.hpp | 3 +- spirv_hlsl.cpp | 41 +++---- 12 files changed, 523 insertions(+), 26 deletions(-) create mode 100644 reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/shaders/asm/comp/bitcast_icmp.asm.comp create mode 100644 shaders-hlsl/asm/comp/bitcast_icmp.asm.comp create mode 100644 shaders-msl/asm/comp/bitcast_icmp.asm.comp create mode 100644 shaders/asm/comp/bitcast_icmp.asm.comp diff --git a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..35143a48 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,28 @@ +RWByteAddressBuffer _5 : register(u0); +RWByteAddressBuffer _6 : register(u1); + +void comp_main() +{ + bool4 _31 = bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w); + bool4 _32 = bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w); + bool4 _33 = bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)); + bool4 _34 = bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)); + bool4 _35 = bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w); + bool4 _36 = bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w); + bool4 _37 = bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)); + bool4 _38 = bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)); + _6.Store4(0, uint4(_31.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _31.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _31.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _31.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_32.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _32.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _32.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _32.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_33.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _33.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _33.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _33.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_34.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _34.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _34.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _34.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_35.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _35.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _35.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _35.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_36.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _36.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _36.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _36.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_37.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _37.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _37.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _37.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_38.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _38.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _38.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _38.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..31c71daa --- /dev/null +++ b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]]) +{ + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) < _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) <= _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 < uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 <= uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) > _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) >= _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 > uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 >= uint4(_5._m0)); +} + diff --git a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..bed3b90a --- /dev/null +++ b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _3_5 +{ + ivec4 _m0; + uvec4 _m1; +} _5; + +layout(binding = 1, std430) restrict buffer _4_6 +{ + uvec4 _m0; + ivec4 _m1; +} _6; + +void main() +{ + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(_5._m1, uvec4(_5._m0))); +} + diff --git a/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..35143a48 --- /dev/null +++ b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,28 @@ +RWByteAddressBuffer _5 : register(u0); +RWByteAddressBuffer _6 : register(u1); + +void comp_main() +{ + bool4 _31 = bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w); + bool4 _32 = bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w); + bool4 _33 = bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)); + bool4 _34 = bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)); + bool4 _35 = bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w); + bool4 _36 = bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w); + bool4 _37 = bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)); + bool4 _38 = bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)); + _6.Store4(0, uint4(_31.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _31.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _31.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _31.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_32.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _32.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _32.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _32.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_33.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _33.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _33.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _33.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_34.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _34.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _34.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _34.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_35.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _35.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _35.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _35.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_36.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _36.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _36.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _36.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_37.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _37.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _37.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _37.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(_38.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _38.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _38.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _38.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..31c71daa --- /dev/null +++ b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]]) +{ + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) < _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) <= _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 < uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 <= uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) > _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) >= _5._m0); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 > uint4(_5._m0)); + _6._m0 = select(uint4(0u), uint4(1u), _5._m1 >= uint4(_5._m0)); +} + diff --git a/reference/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..bed3b90a --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _3_5 +{ + ivec4 _m0; + uvec4 _m1; +} _5; + +layout(binding = 1, std430) restrict buffer _4_6 +{ + uvec4 _m0; + ivec4 _m1; +} _6; + +void main() +{ + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(_5._m1, uvec4(_5._m0))); + _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(_5._m1, uvec4(_5._m0))); +} + diff --git a/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..b7b4e0b2 --- /dev/null +++ b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/shaders-msl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..b7b4e0b2 --- /dev/null +++ b/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_icmp.asm.comp b/shaders/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 00000000..b7b4e0b2 --- /dev/null +++ b/shaders/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index ca420d5c..9fd4ff05 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -4159,8 +4159,18 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_ } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op, bool negate) + const char *op, bool negate, SPIRType::BaseType expected_type) { + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + SPIRType target_type0 = type0; + SPIRType target_type1 = type1; + target_type0.basetype = expected_type; + target_type1.basetype = expected_type; + target_type0.vecsize = 1; + target_type1.vecsize = 1; + auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; @@ -4170,11 +4180,21 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result // that these expressions are properly flushed to temporaries if needed. if (negate) expr += "!("; - expr += to_extract_component_expression(op0, i); + + if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) + expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); + else + expr += to_extract_component_expression(op0, i); + expr += ' '; expr += op; expr += ' '; - expr += to_extract_component_expression(op1, i); + + if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) + expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); + else + expr += to_extract_component_expression(op1, i); + if (negate) expr += ")"; @@ -7820,6 +7840,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: return expression_type(ops[2]).width; default: @@ -8842,7 +8866,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); else GLSL_BOP(||); break; @@ -8856,7 +8880,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); else GLSL_BOP(&&); break; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index ddce638b..6f59bd82 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -471,7 +471,8 @@ protected: void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); - void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + bool negate, SPIRType::BaseType expected_type); void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 8241d3b6..4d4e276c 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -4032,6 +4032,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); switch (opcode) { @@ -4194,7 +4195,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP_CAST(==, int_type); break; @@ -4214,7 +4215,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP(==); break; @@ -4226,7 +4227,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP_CAST(!=, int_type); break; @@ -4250,7 +4251,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP(!=); break; @@ -4261,10 +4262,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); else HLSL_BOP_CAST(>, type); break; @@ -4276,7 +4277,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); else HLSL_BOP(>); break; @@ -4288,7 +4289,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); else CompilerGLSL::emit_instruction(instruction); break; @@ -4300,9 +4301,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); else HLSL_BOP_CAST(>=, type); break; @@ -4314,7 +4315,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); else HLSL_BOP(>=); break; @@ -4326,7 +4327,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); else CompilerGLSL::emit_instruction(instruction); break; @@ -4338,9 +4339,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); else HLSL_BOP_CAST(<, type); break; @@ -4352,7 +4353,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); else HLSL_BOP(<); break; @@ -4364,7 +4365,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); else CompilerGLSL::emit_instruction(instruction); break; @@ -4376,9 +4377,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); else HLSL_BOP_CAST(<=, type); break; @@ -4390,7 +4391,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); else HLSL_BOP(<=); break; @@ -4402,7 +4403,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); else CompilerGLSL::emit_instruction(instruction); break;