GLSL: Support f16x2 <-> f32 bitcast.

There is no native formulation, so introduce a concept of a "complex" bitcast to handle odd-ball cases which have no native unary operation.
2020-04-21 23:27:33 +02:00 · 2020-04-21 23:27:33 +02:00 · 5e5d1c27ce
commit 5e5d1c27ce
parent c58839bfd4
11 changed files with 235 additions and 2 deletions
--- a/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    half2 a;
+    float b;
+    float c;
+    half2 d;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _4 [[buffer(0)]])
+{
+    _4.b = as_type<float>(_4.a);
+    _4.d = as_type<half2>(_4.c);
+}
+
--- a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
+++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
@ -0,0 +1,24 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    f16vec2 a;
+    float b;
+    float c;
+    f16vec2 d;
+} _4;
+
+void main()
+{
+    _4.b = uintBitsToFloat(packFloat2x16(_4.a));
+    _4.d = unpackFloat2x16(floatBitsToUint(_4.c));
+}
+
--- a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk
+++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk
@ -0,0 +1,25 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    f16vec2 a;
+    float b;
+    float c;
+    f16vec2 d;
+} _4;
+
+void main()
+{
+    _4.b = uintBitsToFloat(packFloat2x16(_4.a));
+    _4.d = unpackFloat2x16(floatBitsToUint(_4.c));
+}
+
--- a/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
+++ b/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability StorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpMemberName %SSBO 2 "c"
+               OpMemberName %SSBO 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpMemberDecorate %SSBO 3 Offset 12
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+      %float = OpTypeFloat 32
+       %SSBO = OpTypeStruct %v2half %float %float %v2half
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %uint = OpTypeInt 32 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_3 = OpConstant %int 3
+      %int_2 = OpConstant %int 2
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0
+         %17 = OpLoad %v2half %16
+         %20 = OpBitcast %float %17
+         %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1
+               OpStore %22 %20
+         %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2
+         %26 = OpLoad %float %25
+         %28 = OpBitcast %v2half %26
+         %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3
+               OpStore %29 %28
+               OpReturn
+               OpFunctionEnd
--- a/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
+++ b/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability StorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpMemberName %SSBO 2 "c"
+               OpMemberName %SSBO 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpMemberDecorate %SSBO 3 Offset 12
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+      %float = OpTypeFloat 32
+       %SSBO = OpTypeStruct %v2half %float %float %v2half
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %uint = OpTypeInt 32 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_3 = OpConstant %int 3
+      %int_2 = OpConstant %int 2
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0
+         %17 = OpLoad %v2half %16
+         %20 = OpBitcast %float %17
+         %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1
+               OpStore %22 %20
+         %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2
+         %26 = OpLoad %float %25
+         %28 = OpBitcast %v2half %26
+         %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3
+               OpStore %29 %28
+               OpReturn
+               OpFunctionEnd
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -4636,6 +4636,26 @@ SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_o
 	return expected_type;
 }

+bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
+{
+	// Some bitcasts may require complex casting sequences, and are implemented here.
+	// Otherwise a simply unary function will do with bitcast_glsl_op.
+
+	auto &output_type = get<SPIRType>(result_type);
+	auto &input_type = expression_type(op0);
+	string expr;
+
+	if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
+		expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
+	else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && input_type.vecsize == 2)
+		expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
+	else
+		return false;
+
+	emit_op(result_type, id, expr, should_forward(op0));
+	return true;
+}
+
 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                       const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
 {
@ -9574,8 +9594,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t arg = ops[2];

-		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
-		emit_unary_func_op(result_type, id, arg, op.c_str());
+		if (!emit_complex_bitcast(result_type, id, arg))
+		{
+			auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
+			emit_unary_func_op(result_type, id, arg, op.c_str());
+		}
 		break;
 	}

--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -514,6 +514,8 @@ protected:
 	SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type,
 	                                  uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type);

+	virtual bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0);
+
 	std::string to_ternary_expression(const SPIRType &result_type, uint32_t select, uint32_t true_value,
 	                                  uint32_t false_value);

--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -3216,6 +3216,11 @@ void CompilerHLSL::emit_uniform(const SPIRVariable &var)
 		emit_legacy_uniform(var);
 }

+bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
+{
+	return false;
+}
+
 string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@ -216,6 +216,7 @@ private:
 	std::string layout_for_member(const SPIRType &type, uint32_t index) override;
 	std::string to_interpolation_qualifiers(const Bitset &flags) override;
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
+	bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) override;
 	std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_resource_binding(const SPIRVariable &var);
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -11346,6 +11346,11 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
 	}
 }

+bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
+{
+	return false;
+}
+
 // Returns an MSL string identifying the name of a SPIR-V builtin.
 // Output builtins are qualified with the name of the stage out structure.
 string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -635,6 +635,7 @@ protected:
 	bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;

 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
+	bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override;
 	bool skip_argument(uint32_t id) const override;
 	std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override;
 	std::string to_qualifiers_glsl(uint32_t id) override;