Handle more cases with FP16 and texture sampling.

2019-06-27 15:04:22 +02:00 · 2019-06-27 15:04:22 +02:00 · c76b99b711
commit c76b99b711
parent 656d129c00
15 changed files with 392 additions and 9 deletions
--- a/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
--- a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
--- a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -4602,7 +4602,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)

 	SmallVector<uint32_t> inherited_expressions;

-	uint32_t result_type = ops[0];
+	uint32_t result_type_id = ops[0];
 	uint32_t id = ops[1];
 	uint32_t img = ops[2];
 	uint32_t coord = ops[3];
@ -4613,6 +4613,8 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	bool fetch = false;
 	const uint32_t *opt = nullptr;

+	auto &result_type = get<SPIRType>(result_type_id);
+
 	inherited_expressions.push_back(coord);

 	switch (op)
@ -4771,14 +4773,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 			image_is_depth = true;

 		if (image_is_depth)
-			expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
+			expr = remap_swizzle(result_type, 1, expr);
+	}
+
+	if (!backend.support_small_type_sampling_result && result_type.width < 32)
+	{
+		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
 	}

 	// Deals with reads from MSL. We might need to downconvert to fewer components.
 	if (op == OpImageRead)
-		expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+		expr = remap_swizzle(result_type, 4, expr);

-	emit_op(result_type, id, expr, forward);
+	emit_op(result_type_id, id, expr, forward);
 	for (auto &inherit : inherited_expressions)
 		inherit_expression_dependencies(id, inherit);

@ -9878,7 +9887,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B

 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
 {
-	return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
+	auto &type = expression_type(id);
+	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
+	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	{
+		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+		auto &result_type = get<SPIRType>(type.image.type);
+		if (result_type.width < 32)
+			return "mediump ";
+	}
+	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }

 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@ -10094,15 +10112,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	switch (imagetype.basetype)
 	{
 	case SPIRType::Int:
+	case SPIRType::Short:
+	case SPIRType::SByte:
 		res = "i";
 		break;
 	case SPIRType::UInt:
+	case SPIRType::UShort:
+	case SPIRType::UByte:
 		res = "u";
 		break;
 	default:
 		break;
 	}

+	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
+
 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
 		return res + "subpassInput" + (type.image.ms ? "MS" : "");

--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -404,6 +404,7 @@ protected:
 		bool array_is_value_type = true;
 		bool comparison_image_samples_scalar = false;
 		bool native_pointers = false;
+		bool support_small_type_sampling_result = false;
 	} backend;

 	void emit_struct(SPIRType &type);
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -708,6 +708,7 @@ string CompilerMSL::compile()
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
+	backend.support_small_type_sampling_result = true;

 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@ -4765,6 +4766,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool
 	return fname;
 }

+string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
+{
+	SPIRType t;
+	t.basetype = SPIRType::Float;
+	t.vecsize = components;
+	t.columns = 1;
+	return join(type_to_glsl_constructor(t), "(", expr, ")");
+}
+
+static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
+{
+	// Double is not supported to begin with, but doesn't hurt to check for completion.
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
+}
+
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
 string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
                                     uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
@ -4803,6 +4819,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 1);

 		alt_coord_component = 1;
 		break;
@ -4838,6 +4856,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 2);

 		alt_coord_component = 2;
 		break;
@ -4848,6 +4868,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);

 		alt_coord_component = 3;
 		break;
@ -4865,6 +4887,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 				tex_coords = enclose_expression(tex_coords) + ".xyz";
 		}

+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
+
 		alt_coord_component = 3;
 		break;

@ -4895,7 +4920,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 	// If projection, use alt coord as divisor
 	if (is_proj)
-		tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	{
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+		else
+			tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	}

 	if (!farg_str.empty())
 		farg_str += ", ";
@ -4929,11 +4959,18 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		forward = forward && should_forward(dref);
 		farg_str += ", ";

+		auto &dref_type = expression_type(dref);
+
+		string dref_expr;
 		if (is_proj)
-			farg_str +=
-			    to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
+			dref_expr = join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
 		else
-			farg_str += to_expression(dref);
+			dref_expr = to_expression(dref);
+
+		if (sampling_type_needs_f32_conversion(dref_type))
+			dref_expr = convert_to_f32(dref_expr, 1);
+
+		farg_str += dref_expr;

 		if (msl_options.is_macos() && (grad_x || grad_y))
 		{
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -544,6 +544,8 @@ protected:

 	void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);

+	std::string convert_to_f32(const std::string &expr, uint32_t components);
+
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;