Merge pull request #2218 from KhronosGroup/pr-2217

Merge PR 2217
This commit is contained in:
Hans-Kristian Arntzen 2023-10-16 12:19:24 +02:00 committed by GitHub
commit 2de1265fca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 152 additions and 2 deletions

View File

@ -677,6 +677,7 @@ struct CLIArguments
bool msl_check_discarded_frag_stores = false;
bool msl_sample_dref_lod_array_as_grad = false;
bool msl_runtime_array_rich_descriptor = false;
bool msl_replace_recursive_inputs = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
@ -867,6 +868,7 @@ static void print_help_msl()
"\t\tUses same values as Metal MTLArgumentBuffersTier enumeration (0 = Tier1, 1 = Tier2).\n"
"\t\tNOTE: Setting this value no longer enables msl-argument-buffers implicitly.\n"
"\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n"
"\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n"
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
@ -1233,6 +1235,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad;
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
@ -1792,6 +1795,8 @@ static int main_inner(int argc, char *argv[])
});
cbs.add("--msl-runtime-array-rich-descriptor",
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
cbs.add("--msl-replace-recursive-inputs",
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -0,0 +1,33 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct recurs_1;
struct recurs
{
int m1;
device recurs_1* m2;
};
struct recurs_1
{
int m1;
device recurs_1* m2;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device void* nums_vp [[buffer(0)]], texture2d<uint, access::write> tex [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
device auto& nums = *(device recurs*)nums_vp;
int rslt = 0;
rslt = nums.m1;
int _28 = nums.m1 + nums.m2->m1;
rslt = _28;
int _37 = _28 + nums.m2->m2->m1;
rslt = _37;
tex.write(uint4(uint(_37), 0u, 0u, 1u), uint2(int2(gl_GlobalInvocationID.xy)));
}

View File

@ -0,0 +1,31 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct recurs;
struct recurs
{
int m1;
device recurs* m2;
};
struct recurs_1
{
int m1;
device recurs_1* m2;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device void* nums_vp [[buffer(0)]], texture2d<uint, access::write> tex [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
device auto& nums = *(device recurs*)nums_vp;
int rslt = 0;
rslt += nums.m1;
rslt += nums.m2->m1;
rslt += nums.m2->m2->m1;
tex.write(uint4(uint(rslt), 0u, 0u, 1u), uint2(int2(gl_GlobalInvocationID.xy)));
}

View File

@ -0,0 +1,21 @@
#version 450
#extension GL_EXT_buffer_reference2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(buffer_reference) buffer recurs;
layout(buffer_reference, buffer_reference_align = 16, set = 0, binding = 1, std140) buffer recurs
{
int m1;
recurs m2;
} nums;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D tex;
void main()
{
int rslt = 0;
rslt += nums.m1;
rslt += nums.m2.m1;
rslt += nums.m2.m2.m1;
imageStore(tex, ivec2(gl_GlobalInvocationID.xy), uvec4(rslt, 0u, 0u, 1u));
}

View File

@ -5465,6 +5465,36 @@ void Compiler::analyze_interlocked_resource_usage()
}
}
// Helper function
bool Compiler::check_internal_recursion(const SPIRType &type, std::unordered_set<uint32_t> &checked_ids)
{
if (type.basetype != SPIRType::Struct)
return false;
if (checked_ids.count(type.self))
return true;
// Recurse into struct members
bool is_recursive = false;
checked_ids.insert(type.self);
uint32_t mbr_cnt = uint32_t(type.member_types.size());
for (uint32_t mbr_idx = 0; !is_recursive && mbr_idx < mbr_cnt; mbr_idx++)
{
uint32_t mbr_type_id = type.member_types[mbr_idx];
auto &mbr_type = get<SPIRType>(mbr_type_id);
is_recursive |= check_internal_recursion(mbr_type, checked_ids);
}
checked_ids.erase(type.self);
return is_recursive;
}
// Return whether the struct type contains a structural recursion nested somewhere within its content.
bool Compiler::type_contains_recursion(const SPIRType &type)
{
std::unordered_set<uint32_t> checked_ids;
return check_internal_recursion(type, checked_ids);
}
bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
{
if (!type_is_top_level_array(type))

View File

@ -1145,6 +1145,8 @@ protected:
bool has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const;
void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration);
bool check_internal_recursion(const SPIRType &type, std::unordered_set<uint32_t> &checked_ids);
bool type_contains_recursion(const SPIRType &type);
bool type_is_array_of_pointers(const SPIRType &type) const;
bool type_is_top_level_physical_pointer(const SPIRType &type) const;
bool type_is_top_level_pointer(const SPIRType &type) const;

View File

@ -13262,8 +13262,13 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
{
if (!ep_args.empty())
ep_args += ", ";
ep_args +=
get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name;
ep_args += get_argument_address_space(var) + " ";
if (recursive_inputs.count(type.self))
ep_args += string("void* ") + to_restrict(var_id, true) + r.name + "_vp";
else
ep_args += type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name;
ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
if (interlocked_resources.count(var_id))
ep_args += ", raster_order_group(0)";
@ -13446,6 +13451,19 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
});
}
}
if (msl_options.replace_recursive_inputs && type_contains_recursion(type) &&
(var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer))
{
recursive_inputs.insert(type.self);
entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
auto addr_space = get_argument_address_space(var);
auto var_name = to_name(var_id);
statement(addr_space, " auto& ", to_restrict(var_id, true), var_name,
" = *(", addr_space, " ", type_to_glsl(type), "*)", var_name, "_vp;");
});
}
});
// Builtin variables

View File

@ -505,6 +505,13 @@ public:
// Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this.
bool readwrite_texture_fences = true;
// Metal 3.1 introduced a Metal regression bug which causes infinite recursion during
// Metal's analysis of an entry point input structure that is itself recursive. Enabling
// this option will replace the recursive input declaration with a alternate variable of
// type void*, and then cast to the correct type at the top of the entry point function.
// The bug has been reported to Apple, and will hopefully be fixed in future releases.
bool replace_recursive_inputs = false;
bool is_ios() const
{
return platform == iOS;
@ -1194,6 +1201,7 @@ protected:
SmallVector<uint32_t> buffer_aliases_discrete;
std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations
std::unordered_set<uint32_t> pull_model_inputs;
std::unordered_set<uint32_t> recursive_inputs;
SmallVector<SPIRVariable *> entry_point_bindings;

View File

@ -359,6 +359,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-decoration-binding')
if '.rich-descriptor.' in shader:
msl_args.append('--msl-runtime-array-rich-descriptor')
if '.replace-recursive-inputs.' in shader:
msl_args.append('--msl-replace-recursive-inputs')
if '.mask-location-0.' in shader:
msl_args.append('--mask-stage-output-location')
msl_args.append('0')