diff --git a/main.cpp b/main.cpp index d8aff152..907cf1c2 100644 --- a/main.cpp +++ b/main.cpp @@ -677,6 +677,7 @@ struct CLIArguments bool msl_check_discarded_frag_stores = false; bool msl_sample_dref_lod_array_as_grad = false; bool msl_runtime_array_rich_descriptor = false; + bool msl_replace_recursive_inputs = false; const char *msl_combined_sampler_suffix = nullptr; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; @@ -867,6 +868,7 @@ static void print_help_msl() "\t\tUses same values as Metal MTLArgumentBuffersTier enumeration (0 = Tier1, 1 = Tier2).\n" "\t\tNOTE: Setting this value no longer enables msl-argument-buffers implicitly.\n" "\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n" + "\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n" "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" "\t\tEmits [[color(N)]] inputs in fragment stage.\n" @@ -1233,6 +1235,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad; msl_opts.ios_support_base_vertex_instance = true; msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor; + msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1792,6 +1795,8 @@ static int main_inner(int argc, char *argv[]) }); cbs.add("--msl-runtime-array-rich-descriptor", [&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; }); + cbs.add("--msl-replace-recursive-inputs", + [&args](CLIParser &) { args.msl_replace_recursive_inputs = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/reference/opt/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp b/reference/opt/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp new file mode 100644 index 00000000..9dd3a7ff --- /dev/null +++ b/reference/opt/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp @@ -0,0 +1,33 @@ +#include +#include + +using namespace metal; + +struct recurs_1; + +struct recurs +{ + int m1; + device recurs_1* m2; +}; + +struct recurs_1 +{ + int m1; + device recurs_1* m2; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device void* nums_vp [[buffer(0)]], texture2d tex [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + device auto& nums = *(device recurs*)nums_vp; + int rslt = 0; + rslt = nums.m1; + int _28 = nums.m1 + nums.m2->m1; + rslt = _28; + int _37 = _28 + nums.m2->m2->m1; + rslt = _37; + tex.write(uint4(uint(_37), 0u, 0u, 1u), uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/reference/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp b/reference/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp new file mode 100644 index 00000000..939619c5 --- /dev/null +++ b/reference/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct recurs; + +struct recurs +{ + int m1; + device recurs* m2; +}; + +struct recurs_1 +{ + int m1; + device recurs_1* m2; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device void* nums_vp [[buffer(0)]], texture2d tex [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + device auto& nums = *(device recurs*)nums_vp; + int rslt = 0; + rslt += nums.m1; + rslt += nums.m2->m1; + rslt += nums.m2->m2->m1; + tex.write(uint4(uint(rslt), 0u, 0u, 1u), uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp b/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp new file mode 100644 index 00000000..ce776525 --- /dev/null +++ b/shaders-msl/comp/metal3_1_regression_patch.replace-recursive-inputs.msl3.comp @@ -0,0 +1,21 @@ +#version 450 +#extension GL_EXT_buffer_reference2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer recurs; +layout(buffer_reference, buffer_reference_align = 16, set = 0, binding = 1, std140) buffer recurs +{ + int m1; + recurs m2; +} nums; + +layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D tex; + +void main() +{ + int rslt = 0; + rslt += nums.m1; + rslt += nums.m2.m1; + rslt += nums.m2.m2.m1; + imageStore(tex, ivec2(gl_GlobalInvocationID.xy), uvec4(rslt, 0u, 0u, 1u)); +} diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 0da24e7b..88539550 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -5465,6 +5465,36 @@ void Compiler::analyze_interlocked_resource_usage() } } +// Helper function +bool Compiler::check_internal_recursion(const SPIRType &type, std::unordered_set &checked_ids) +{ + if (type.basetype != SPIRType::Struct) + return false; + + if (checked_ids.count(type.self)) + return true; + + // Recurse into struct members + bool is_recursive = false; + checked_ids.insert(type.self); + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t mbr_idx = 0; !is_recursive && mbr_idx < mbr_cnt; mbr_idx++) + { + uint32_t mbr_type_id = type.member_types[mbr_idx]; + auto &mbr_type = get(mbr_type_id); + is_recursive |= check_internal_recursion(mbr_type, checked_ids); + } + checked_ids.erase(type.self); + return is_recursive; +} + +// Return whether the struct type contains a structural recursion nested somewhere within its content. +bool Compiler::type_contains_recursion(const SPIRType &type) +{ + std::unordered_set checked_ids; + return check_internal_recursion(type, checked_ids); +} + bool Compiler::type_is_array_of_pointers(const SPIRType &type) const { if (!type_is_top_level_array(type)) diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 8b85f7c5..b1fca07f 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -1145,6 +1145,8 @@ protected: bool has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const; void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); + bool check_internal_recursion(const SPIRType &type, std::unordered_set &checked_ids); + bool type_contains_recursion(const SPIRType &type); bool type_is_array_of_pointers(const SPIRType &type) const; bool type_is_top_level_physical_pointer(const SPIRType &type) const; bool type_is_top_level_pointer(const SPIRType &type) const; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 9f9fcfc3..5605d172 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13262,8 +13262,13 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { if (!ep_args.empty()) ep_args += ", "; - ep_args += - get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name; + ep_args += get_argument_address_space(var) + " "; + + if (recursive_inputs.count(type.self)) + ep_args += string("void* ") + to_restrict(var_id, true) + r.name + "_vp"; + else + ep_args += type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; if (interlocked_resources.count(var_id)) ep_args += ", raster_order_group(0)"; @@ -13446,6 +13451,19 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); } } + + if (msl_options.replace_recursive_inputs && type_contains_recursion(type) && + (var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)) + { + recursive_inputs.insert(type.self); + entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { + auto addr_space = get_argument_address_space(var); + auto var_name = to_name(var_id); + statement(addr_space, " auto& ", to_restrict(var_id, true), var_name, + " = *(", addr_space, " ", type_to_glsl(type), "*)", var_name, "_vp;"); + }); + } }); // Builtin variables diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 26167f67..dc149530 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -505,6 +505,13 @@ public: // Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this. bool readwrite_texture_fences = true; + // Metal 3.1 introduced a Metal regression bug which causes infinite recursion during + // Metal's analysis of an entry point input structure that is itself recursive. Enabling + // this option will replace the recursive input declaration with a alternate variable of + // type void*, and then cast to the correct type at the top of the entry point function. + // The bug has been reported to Apple, and will hopefully be fixed in future releases. + bool replace_recursive_inputs = false; + bool is_ios() const { return platform == iOS; @@ -1194,6 +1201,7 @@ protected: SmallVector buffer_aliases_discrete; std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations std::unordered_set pull_model_inputs; + std::unordered_set recursive_inputs; SmallVector entry_point_bindings; diff --git a/test_shaders.py b/test_shaders.py index 887cb5b7..5dd400bd 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -359,6 +359,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('--msl-decoration-binding') if '.rich-descriptor.' in shader: msl_args.append('--msl-runtime-array-rich-descriptor') + if '.replace-recursive-inputs.' in shader: + msl_args.append('--msl-replace-recursive-inputs') if '.mask-location-0.' in shader: msl_args.append('--mask-stage-output-location') msl_args.append('0')