Merge pull request #1046 from KhronosGroup/texture-fp16-coord

MSL: Fix sampling with FP16 coordinates.
This commit is contained in:
Hans-Kristian Arntzen 2019-06-27 15:22:57 +02:00 committed by GitHub
commit d1bdb6d491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 405 additions and 10 deletions

View File

@ -0,0 +1,29 @@
Texture2D<float4> uTexture : register(t0);
SamplerState _uTexture_sampler : register(s0);
static min16float4 FragColor;
static min16float2 UV;
struct SPIRV_Cross_Input
{
min16float2 UV : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
min16float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
UV = stage_input.UV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
half4 FragColor [[color(0)]];
};
struct main0_in
{
half2 UV [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
return out;
}

View File

@ -0,0 +1,19 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@ -0,0 +1,20 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(set = 0, binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@ -30,16 +30,23 @@ void frag_main()
case 0:
{
_30 = 3;
j = _30;
_31 = 0;
j = _31;
break;
}
default:
{
j = _30;
_31 = 0;
j = _31;
break;
}
case 1:
case 11:
{
j = _31;
break;
}
case 2:
{
@ -59,6 +66,8 @@ void frag_main()
}
case 4:
{
i = 0;
break;
}
case 5:
{

View File

@ -0,0 +1,29 @@
Texture2D<float4> uTexture : register(t0);
SamplerState _uTexture_sampler : register(s0);
static min16float4 FragColor;
static min16float2 UV;
struct SPIRV_Cross_Input
{
min16float2 UV : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
min16float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
UV = stage_input.UV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
half4 FragColor [[color(0)]];
};
struct main0_in
{
half2 UV [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
return out;
}

View File

@ -0,0 +1,19 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@ -0,0 +1,20 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(set = 0, binding = 0) uniform sampler2D uTexture;
layout(location = 0) out f16vec4 FragColor;
layout(location = 0) in f16vec2 UV;
void main()
{
FragColor = f16vec4(texture(uTexture, UV));
}

View File

@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,46 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %FragColor %UV
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 450
OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
OpName %main "main"
OpName %FragColor "FragColor"
OpName %uTexture "uTexture"
OpName %UV "UV"
OpDecorate %FragColor Location 0
OpDecorate %uTexture DescriptorSet 0
OpDecorate %uTexture Binding 0
OpDecorate %UV Location 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%float = OpTypeFloat 32
%v4half = OpTypeVector %half 4
%v4float = OpTypeVector %float 4
%_ptr_Output_v4half = OpTypePointer Output %v4half
%FragColor = OpVariable %_ptr_Output_v4half Output
%11 = OpTypeImage %float 2D 0 0 0 1 Unknown
%12 = OpTypeSampledImage %11
%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
%uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
%v2half = OpTypeVector %half 2
%_ptr_Input_v2half = OpTypePointer Input %v2half
%UV = OpVariable %_ptr_Input_v2half Input
%main = OpFunction %void None %3
%5 = OpLabel
%15 = OpLoad %12 %uTexture
%19 = OpLoad %v2half %UV
%23 = OpImageSampleImplicitLod %v4float %15 %19
%24 = OpFConvert %v4half %23
OpStore %FragColor %24
OpReturn
OpFunctionEnd

View File

@ -4602,7 +4602,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
SmallVector<uint32_t> inherited_expressions;
uint32_t result_type = ops[0];
uint32_t result_type_id = ops[0];
uint32_t id = ops[1];
uint32_t img = ops[2];
uint32_t coord = ops[3];
@ -4613,6 +4613,8 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
bool fetch = false;
const uint32_t *opt = nullptr;
auto &result_type = get<SPIRType>(result_type_id);
inherited_expressions.push_back(coord);
switch (op)
@ -4771,14 +4773,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
image_is_depth = true;
if (image_is_depth)
expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
expr = remap_swizzle(result_type, 1, expr);
}
if (!backend.support_small_type_sampling_result && result_type.width < 32)
{
// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
}
// Deals with reads from MSL. We might need to downconvert to fewer components.
if (op == OpImageRead)
expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
expr = remap_swizzle(result_type, 4, expr);
emit_op(result_type, id, expr, forward);
emit_op(result_type_id, id, expr, forward);
for (auto &inherit : inherited_expressions)
inherit_expression_dependencies(id, inherit);
@ -9878,7 +9887,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
auto &type = expression_type(id);
bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
{
// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
auto &result_type = get<SPIRType>(type.image.type);
if (result_type.width < 32)
return "mediump ";
}
return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
}
string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@ -10094,15 +10112,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
switch (imagetype.basetype)
{
case SPIRType::Int:
case SPIRType::Short:
case SPIRType::SByte:
res = "i";
break;
case SPIRType::UInt:
case SPIRType::UShort:
case SPIRType::UByte:
res = "u";
break;
default:
break;
}
// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
return res + "subpassInput" + (type.image.ms ? "MS" : "");
@ -11677,7 +11702,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
}
auto &case_block = get<SPIRBlock>(target_block);
if (i + 1 < num_blocks &&
if (backend.support_case_fallthrough && i + 1 < num_blocks &&
execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
{
// We will fall through here, so just terminate the block chain early.

View File

@ -404,6 +404,8 @@ protected:
bool array_is_value_type = true;
bool comparison_image_samples_scalar = false;
bool native_pointers = false;
bool support_small_type_sampling_result = false;
bool support_case_fallthrough = true;
} backend;
void emit_struct(SPIRType &type);

View File

@ -4700,6 +4700,7 @@ string CompilerHLSL::compile()
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
backend.nonuniform_qualifier = "NonUniformResourceIndex";
backend.support_case_fallthrough = false;
fixup_type_alias();
reorder_type_alias();

View File

@ -708,6 +708,7 @@ string CompilerMSL::compile()
backend.comparison_image_samples_scalar = true;
backend.native_pointers = true;
backend.nonuniform_qualifier = "";
backend.support_small_type_sampling_result = true;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@ -4765,6 +4766,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool
return fname;
}
string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
{
SPIRType t;
t.basetype = SPIRType::Float;
t.vecsize = components;
t.columns = 1;
return join(type_to_glsl_constructor(t), "(", expr, ")");
}
static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
{
// Double is not supported to begin with, but doesn't hurt to check for completion.
return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
}
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
@ -4803,6 +4819,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 1);
alt_coord_component = 1;
break;
@ -4838,6 +4856,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 2);
alt_coord_component = 2;
break;
@ -4848,6 +4868,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
else if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 3);
alt_coord_component = 3;
break;
@ -4865,6 +4887,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
tex_coords = enclose_expression(tex_coords) + ".xyz";
}
if (sampling_type_needs_f32_conversion(coord_type))
tex_coords = convert_to_f32(tex_coords, 3);
alt_coord_component = 3;
break;
@ -4895,7 +4920,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
// If projection, use alt coord as divisor
if (is_proj)
tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
{
if (sampling_type_needs_f32_conversion(coord_type))
tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
else
tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
}
if (!farg_str.empty())
farg_str += ", ";
@ -4929,11 +4959,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
forward = forward && should_forward(dref);
farg_str += ", ";
auto &dref_type = expression_type(dref);
string dref_expr;
if (is_proj)
farg_str +=
to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
dref_expr =
join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
else
farg_str += to_expression(dref);
dref_expr = to_expression(dref);
if (sampling_type_needs_f32_conversion(dref_type))
dref_expr = convert_to_f32(dref_expr, 1);
farg_str += dref_expr;
if (msl_options.is_macos() && (grad_x || grad_y))
{

View File

@ -544,6 +544,8 @@ protected:
void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
std::string convert_to_f32(const std::string &expr, uint32_t components);
Options msl_options;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;