Merge pull request #2331 from aitor-lunarg/force-depth-passthrough

MSL: Add option to force depth write in fragment shaders
This commit is contained in:
Hans-Kristian Arntzen 2024-05-24 17:11:06 +02:00 committed by GitHub
commit d47a140735
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 236 additions and 10 deletions

View File

@ -681,6 +681,7 @@ struct CLIArguments
bool msl_replace_recursive_inputs = false;
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
bool msl_input_attachment_is_ds_attachment = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
@ -873,6 +874,10 @@ static void print_help_msl()
"\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n"
"\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n"
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
"\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n"
"\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n"
"\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n"
"\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n"
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
"\t\tRequires an Apple GPU.\n"
@ -1257,6 +1262,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment;
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
msl_comp->set_msl_options(msl_opts);
@ -1823,6 +1829,7 @@ static int main_inner(int argc, char *argv[])
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
cbs.add("--msl-replace-recursive-inputs",
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -0,0 +1,17 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
};
[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
return out;
}

View File

@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = 1.0;
return out;
}

View File

@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = gl_FragCoord.z;
return out;
}

View File

@ -0,0 +1,17 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
};
[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
return out;
}

View File

@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = 1.0;
return out;
}

View File

@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};
fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = gl_FragCoord.z;
return out;
}

View File

@ -0,0 +1,12 @@
#version 450
layout(early_fragment_tests) in;
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
layout (location = 0) out vec4 color;
void main()
{
color = subpassLoad(inputDepth);
}

View File

@ -0,0 +1,11 @@
#version 450
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
layout (location = 0) out vec4 color;
void main()
{
color = subpassLoad(inputDepth);
gl_FragDepth = 1.0f;
}

View File

@ -0,0 +1,10 @@
#version 450
layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;
layout (location = 0) out vec4 color;
void main()
{
color = subpassLoad(inputDepth);
}

View File

@ -271,11 +271,14 @@ void CompilerMSL::build_implicit_builtins()
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
bool force_frag_depth_passthrough =
get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size)
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough)
{
bool has_frag_coord = false;
bool has_sample_id = false;
@ -292,6 +295,7 @@ void CompilerMSL::build_implicit_builtins()
bool has_helper_invocation = false;
bool has_local_invocation_index = false;
bool has_workgroup_size = false;
bool has_frag_depth = false;
uint32_t workgroup_id_type = 0;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
@ -312,6 +316,13 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
does_shader_write_sample_mask = true;
}
if (force_frag_depth_passthrough && builtin == BuiltInFragDepth)
{
builtin_frag_depth_id = var.self;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self);
has_frag_depth = true;
}
}
if (var.storage != StorageClassInput)
@ -902,6 +913,36 @@ void CompilerMSL::build_implicit_builtins()
builtin_workgroup_size_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
}
if (!has_frag_depth && force_frag_depth_passthrough)
{
uint32_t offset = ir.increase_bound_by(3);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
uint32_t var_id = offset + 2;
// Create gl_FragDepth
SPIRType float_type { OpTypeFloat };
float_type.basetype = SPIRType::Float;
float_type.width = 32;
float_type.vecsize = 1;
set<SPIRType>(type_id, float_type);
SPIRType float_type_ptr_in = float_type;
float_type_ptr_in.op = spv::OpTypePointer;
float_type_ptr_in.pointer = true;
float_type_ptr_in.pointer_depth++;
float_type_ptr_in.parent_type = type_id;
float_type_ptr_in.storage = StorageClassOutput;
auto &ptr_in_type = set<SPIRType>(type_ptr_id, float_type_ptr_in);
ptr_in_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth);
builtin_frag_depth_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
active_output_builtins.set(BuiltInFragDepth);
}
}
if (needs_swizzle_buffer_def)
@ -1571,6 +1612,8 @@ string CompilerMSL::compile()
add_active_interface_variable(builtin_dispatch_base_id);
if (builtin_sample_mask_id)
add_active_interface_variable(builtin_sample_mask_id);
if (builtin_frag_depth_id)
add_active_interface_variable(builtin_frag_depth_id);
// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
@ -1869,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
{
uint32_t base_id = ops[0];
if (global_var_ids.find(base_id) != global_var_ids.end())
{
added_arg_ids.insert(base_id);
if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id)
writes_to_depth = true;
}
uint32_t rvalue_id = ops[1];
if (global_var_ids.find(rvalue_id) != global_var_ids.end())
added_arg_ids.insert(rvalue_id);
@ -14513,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
}
}
else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
is_builtin_variable(var) && active_output_builtins.get(bi_type))
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
switch (bi_type)
{
case BuiltInSampleMask:
if (has_additional_fixed_sample_mask())
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
}
break;
case BuiltInFragDepth:
if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth)
{
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;");
});
}
break;
default:
break;
}
}
});
}

View File

@ -529,6 +529,13 @@ public:
// with side effects. Provided as an option hoping Metal will fix this issue in the future.
bool force_fragment_with_side_effects_execution = false;
// If set, adds a depth pass through statement to circumvent the following issue:
// When the same depth/stencil is used as input and depth/stencil attachment, we need to
// force Metal to perform the depth/stencil write after fragment execution. Otherwise,
// Metal will write to the depth attachment before fragment execution. This happens
// if the fragment does not modify the depth value.
bool input_attachment_is_ds_attachment = false;
bool is_ios() const
{
return platform == iOS;
@ -1094,6 +1101,7 @@ protected:
uint32_t builtin_stage_input_size_id = 0;
uint32_t builtin_local_invocation_index_id = 0;
uint32_t builtin_workgroup_size_id = 0;
uint32_t builtin_frag_depth_id = 0;
uint32_t swizzle_buffer_id = 0;
uint32_t buffer_size_buffer_id = 0;
uint32_t view_mask_buffer_id = 0;
@ -1190,6 +1198,7 @@ protected:
bool needs_subgroup_size = false;
bool needs_sample_id = false;
bool needs_helper_invocation = false;
bool writes_to_depth = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";

View File

@ -388,6 +388,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-runtime-array-rich-descriptor')
if '.replace-recursive-inputs.' in shader:
msl_args.append('--msl-replace-recursive-inputs')
if '.input-attachment-is-ds-attachment.' in shader:
msl_args.append('--msl-input-attachment-is-ds-attachment')
if '.mask-location-0.' in shader:
msl_args.append('--mask-stage-output-location')
msl_args.append('0')