Merge pull request #2331 from aitor-lunarg/force-depth-passthrough
MSL: Add option to force depth write in fragment shaders
This commit is contained in:
commit
d47a140735
7
main.cpp
7
main.cpp
@ -681,6 +681,7 @@ struct CLIArguments
|
||||
bool msl_replace_recursive_inputs = false;
|
||||
bool msl_readwrite_texture_fences = true;
|
||||
bool msl_agx_manual_cube_grad_fixup = false;
|
||||
bool msl_input_attachment_is_ds_attachment = false;
|
||||
const char *msl_combined_sampler_suffix = nullptr;
|
||||
bool glsl_emit_push_constant_as_ubo = false;
|
||||
bool glsl_emit_ubo_as_plain_uniforms = false;
|
||||
@ -873,6 +874,10 @@ static void print_help_msl()
|
||||
"\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n"
|
||||
"\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n"
|
||||
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
|
||||
"\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n"
|
||||
"\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n"
|
||||
"\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n"
|
||||
"\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n"
|
||||
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
|
||||
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
|
||||
"\t\tRequires an Apple GPU.\n"
|
||||
@ -1257,6 +1262,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
|
||||
msl_opts.ios_support_base_vertex_instance = true;
|
||||
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
|
||||
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
|
||||
msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment;
|
||||
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
|
||||
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
|
||||
msl_comp->set_msl_options(msl_opts);
|
||||
@ -1823,6 +1829,7 @@ static int main_inner(int argc, char *argv[])
|
||||
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
|
||||
cbs.add("--msl-replace-recursive-inputs",
|
||||
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
|
||||
cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; });
|
||||
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
|
||||
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
|
||||
auto old_name = parser.next_string();
|
||||
|
@ -0,0 +1,17 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
};
|
||||
|
||||
[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
float gl_FragDepth [[depth(any)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
out.gl_FragDepth = 1.0;
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
float gl_FragDepth [[depth(any)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
out.gl_FragDepth = gl_FragCoord.z;
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
};
|
||||
|
||||
[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
float gl_FragDepth [[depth(any)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
out.gl_FragDepth = 1.0;
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 color [[color(0)]];
|
||||
float gl_FragDepth [[depth(any)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
|
||||
out.gl_FragDepth = gl_FragCoord.z;
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,12 @@
|
||||
#version 450
|
||||
|
||||
layout(early_fragment_tests) in;
|
||||
|
||||
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
|
||||
|
||||
layout (location = 0) out vec4 color;
|
||||
|
||||
void main()
|
||||
{
|
||||
color = subpassLoad(inputDepth);
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
#version 450
|
||||
|
||||
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
|
||||
|
||||
layout (location = 0) out vec4 color;
|
||||
|
||||
void main()
|
||||
{
|
||||
color = subpassLoad(inputDepth);
|
||||
gl_FragDepth = 1.0f;
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
#version 450
|
||||
|
||||
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
|
||||
|
||||
layout (location = 0) out vec4 color;
|
||||
|
||||
void main()
|
||||
{
|
||||
color = subpassLoad(inputDepth);
|
||||
}
|
@ -271,11 +271,14 @@ void CompilerMSL::build_implicit_builtins()
|
||||
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
|
||||
bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
|
||||
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
|
||||
bool force_frag_depth_passthrough =
|
||||
get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
|
||||
msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;
|
||||
|
||||
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
|
||||
need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
|
||||
needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
|
||||
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size)
|
||||
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough)
|
||||
{
|
||||
bool has_frag_coord = false;
|
||||
bool has_sample_id = false;
|
||||
@ -292,6 +295,7 @@ void CompilerMSL::build_implicit_builtins()
|
||||
bool has_helper_invocation = false;
|
||||
bool has_local_invocation_index = false;
|
||||
bool has_workgroup_size = false;
|
||||
bool has_frag_depth = false;
|
||||
uint32_t workgroup_id_type = 0;
|
||||
|
||||
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
|
||||
@ -312,6 +316,13 @@ void CompilerMSL::build_implicit_builtins()
|
||||
mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
|
||||
does_shader_write_sample_mask = true;
|
||||
}
|
||||
|
||||
if (force_frag_depth_passthrough && builtin == BuiltInFragDepth)
|
||||
{
|
||||
builtin_frag_depth_id = var.self;
|
||||
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self);
|
||||
has_frag_depth = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (var.storage != StorageClassInput)
|
||||
@ -902,6 +913,36 @@ void CompilerMSL::build_implicit_builtins()
|
||||
builtin_workgroup_size_id = var_id;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
|
||||
}
|
||||
|
||||
if (!has_frag_depth && force_frag_depth_passthrough)
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(3);
|
||||
uint32_t type_id = offset;
|
||||
uint32_t type_ptr_id = offset + 1;
|
||||
uint32_t var_id = offset + 2;
|
||||
|
||||
// Create gl_FragDepth
|
||||
SPIRType float_type { OpTypeFloat };
|
||||
float_type.basetype = SPIRType::Float;
|
||||
float_type.width = 32;
|
||||
float_type.vecsize = 1;
|
||||
set<SPIRType>(type_id, float_type);
|
||||
|
||||
SPIRType float_type_ptr_in = float_type;
|
||||
float_type_ptr_in.op = spv::OpTypePointer;
|
||||
float_type_ptr_in.pointer = true;
|
||||
float_type_ptr_in.pointer_depth++;
|
||||
float_type_ptr_in.parent_type = type_id;
|
||||
float_type_ptr_in.storage = StorageClassOutput;
|
||||
|
||||
auto &ptr_in_type = set<SPIRType>(type_ptr_id, float_type_ptr_in);
|
||||
ptr_in_type.self = type_id;
|
||||
set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
|
||||
set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth);
|
||||
builtin_frag_depth_id = var_id;
|
||||
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
|
||||
active_output_builtins.set(BuiltInFragDepth);
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_swizzle_buffer_def)
|
||||
@ -1571,6 +1612,8 @@ string CompilerMSL::compile()
|
||||
add_active_interface_variable(builtin_dispatch_base_id);
|
||||
if (builtin_sample_mask_id)
|
||||
add_active_interface_variable(builtin_sample_mask_id);
|
||||
if (builtin_frag_depth_id)
|
||||
add_active_interface_variable(builtin_frag_depth_id);
|
||||
|
||||
// Create structs to hold input, output and uniform variables.
|
||||
// Do output first to ensure out. is declared at top of entry function.
|
||||
@ -1869,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
{
|
||||
uint32_t base_id = ops[0];
|
||||
if (global_var_ids.find(base_id) != global_var_ids.end())
|
||||
{
|
||||
added_arg_ids.insert(base_id);
|
||||
|
||||
if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id)
|
||||
writes_to_depth = true;
|
||||
}
|
||||
|
||||
uint32_t rvalue_id = ops[1];
|
||||
if (global_var_ids.find(rvalue_id) != global_var_ids.end())
|
||||
added_arg_ids.insert(rvalue_id);
|
||||
@ -14513,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
||||
}
|
||||
}
|
||||
else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
|
||||
is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
|
||||
bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
|
||||
is_builtin_variable(var) && active_output_builtins.get(bi_type))
|
||||
{
|
||||
// If the additional fixed sample mask was set, we need to adjust the sample_mask
|
||||
// output to reflect that. If the shader outputs the sample_mask itself too, we need
|
||||
// to AND the two masks to get the final one.
|
||||
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
|
||||
entry_func.fixup_hooks_out.push_back([=]() {
|
||||
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
|
||||
});
|
||||
switch (bi_type)
|
||||
{
|
||||
case BuiltInSampleMask:
|
||||
if (has_additional_fixed_sample_mask())
|
||||
{
|
||||
// If the additional fixed sample mask was set, we need to adjust the sample_mask
|
||||
// output to reflect that. If the shader outputs the sample_mask itself too, we need
|
||||
// to AND the two masks to get the final one.
|
||||
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
|
||||
entry_func.fixup_hooks_out.push_back([=]() {
|
||||
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
|
||||
});
|
||||
}
|
||||
break;
|
||||
case BuiltInFragDepth:
|
||||
if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth)
|
||||
{
|
||||
entry_func.fixup_hooks_out.push_back([=]() {
|
||||
statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;");
|
||||
});
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -529,6 +529,13 @@ public:
|
||||
// with side effects. Provided as an option hoping Metal will fix this issue in the future.
|
||||
bool force_fragment_with_side_effects_execution = false;
|
||||
|
||||
// If set, adds a depth pass through statement to circumvent the following issue:
|
||||
// When the same depth/stencil is used as input and depth/stencil attachment, we need to
|
||||
// force Metal to perform the depth/stencil write after fragment execution. Otherwise,
|
||||
// Metal will write to the depth attachment before fragment execution. This happens
|
||||
// if the fragment does not modify the depth value.
|
||||
bool input_attachment_is_ds_attachment = false;
|
||||
|
||||
bool is_ios() const
|
||||
{
|
||||
return platform == iOS;
|
||||
@ -1094,6 +1101,7 @@ protected:
|
||||
uint32_t builtin_stage_input_size_id = 0;
|
||||
uint32_t builtin_local_invocation_index_id = 0;
|
||||
uint32_t builtin_workgroup_size_id = 0;
|
||||
uint32_t builtin_frag_depth_id = 0;
|
||||
uint32_t swizzle_buffer_id = 0;
|
||||
uint32_t buffer_size_buffer_id = 0;
|
||||
uint32_t view_mask_buffer_id = 0;
|
||||
@ -1190,6 +1198,7 @@ protected:
|
||||
bool needs_subgroup_size = false;
|
||||
bool needs_sample_id = false;
|
||||
bool needs_helper_invocation = false;
|
||||
bool writes_to_depth = false;
|
||||
std::string qual_pos_var_name;
|
||||
std::string stage_in_var_name = "in";
|
||||
std::string stage_out_var_name = "out";
|
||||
|
@ -388,6 +388,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
|
||||
msl_args.append('--msl-runtime-array-rich-descriptor')
|
||||
if '.replace-recursive-inputs.' in shader:
|
||||
msl_args.append('--msl-replace-recursive-inputs')
|
||||
if '.input-attachment-is-ds-attachment.' in shader:
|
||||
msl_args.append('--msl-input-attachment-is-ds-attachment')
|
||||
if '.mask-location-0.' in shader:
|
||||
msl_args.append('--mask-stage-output-location')
|
||||
msl_args.append('0')
|
||||
|
Loading…
Reference in New Issue
Block a user