MSL: Support dynamic offsets for buffers in argument buffers.

Vulkan has two types of buffer descriptors,
`VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC` and
`VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC`, which allow the client to
offset the buffers by an amount given when the descriptor set is bound
to a pipeline. Metal provides no direct support for this when the buffer
in question is in an argument buffer, so once again we're on our own.
These offsets cannot be stored or associated in any way with the
argument buffer itself, because they are set at bind time.  Different
pipelines may have different offsets set. Therefore, we must use a
separate buffer, not in any argument buffer, to hold these offsets. Then
the shader must manually offset the buffer pointer.

This change fully supports arrays, including arrays of arrays, even
though Vulkan forbids them. It does not, however, support runtime
arrays. Perhaps later.
This commit is contained in:
Chip Davis 2019-09-05 23:14:12 -05:00
parent 537bee3cfa
commit cb35934248
9 changed files with 346 additions and 6 deletions

View File

@ -522,6 +522,7 @@ struct CLIArguments
bool vulkan_glsl_disable_ext_samplerless_texture_functions = false;
bool emit_line_directives = false;
SmallVector<uint32_t> msl_discrete_descriptor_sets;
SmallVector<pair<uint32_t, uint32_t>> msl_dynamic_buffers;
SmallVector<PLSArg> pls_in;
SmallVector<PLSArg> pls_out;
SmallVector<Remap> remaps;
@ -600,6 +601,7 @@ static void print_help()
"\t[--msl-multiview]\n"
"\t[--msl-view-index-from-device-index]\n"
"\t[--msl-dispatch-base]\n"
"\t[--msl-dynamic-buffer <set index> <binding>]\n"
"\t[--hlsl]\n"
"\t[--reflect]\n"
"\t[--shader-model]\n"
@ -764,6 +766,9 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
uint32_t i = 0;
for (auto &v : args.msl_dynamic_buffers)
msl_comp->add_dynamic_buffer(v.first, v.second, i++);
}
else if (args.hlsl)
compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir())));
@ -1086,6 +1091,10 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-view-index-from-device-index",
[&args](CLIParser &) { args.msl_view_index_from_device_index = true; });
cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; });
cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) {
args.msl_argument_buffers = true;
args.msl_dynamic_buffers.push_back(make_pair(parser.next_uint(), parser.next_uint()));
});
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -0,0 +1,89 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Baz
{
int e;
int f;
};
struct Foo
{
int a;
int b;
};
struct Bar
{
int c;
int d;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
struct spvDescriptorSetBuffer0
{
constant Foo* m_34 [[id(0)]];
constant Bar* m_40 [[id(1)]];
};
struct spvDescriptorSetBuffer1
{
device Baz* baz [[id(0)]][3][3][2];
};
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]);
device Baz* baz[3][3][2] =
{
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]),
},
},
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]),
},
},
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]),
},
},
};
baz[gl_GlobalInvocationID.x][gl_GlobalInvocationID.y][gl_GlobalInvocationID.z]->e = _34.a + (*spvDescriptorSet0.m_40).c;
baz[gl_GlobalInvocationID.x][gl_GlobalInvocationID.y][gl_GlobalInvocationID.z]->f = _34.b * (*spvDescriptorSet0.m_40).d;
}

View File

@ -0,0 +1,90 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Baz
{
int e;
int f;
};
struct Foo
{
int a;
int b;
};
struct Bar
{
int c;
int d;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
struct spvDescriptorSetBuffer0
{
constant Foo* m_34 [[id(0)]];
constant Bar* m_40 [[id(1)]];
};
struct spvDescriptorSetBuffer1
{
device Baz* baz [[id(0)]][3][3][2];
};
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]);
device Baz* baz[3][3][2] =
{
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]),
(device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]),
},
},
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]),
(device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]),
},
},
{
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]),
},
{
(device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]),
(device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]),
},
},
};
uint3 coords = gl_GlobalInvocationID;
baz[coords.x][coords.y][coords.z]->e = _34.a + (*spvDescriptorSet0.m_40).c;
baz[coords.x][coords.y][coords.z]->f = _34.b * (*spvDescriptorSet0.m_40).d;
}

View File

@ -0,0 +1,27 @@
#version 450
layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in;
layout(set = 0, binding = 0) uniform Foo
{
int a;
int b;
};
layout(set = 0, binding = 1) uniform Bar
{
int c;
int d;
};
layout(set = 1, binding = 2) buffer Baz
{
int e;
int f;
} baz[3][3][2];
void main()
{
uvec3 coords = gl_GlobalInvocationID;
baz[coords.x][coords.y][coords.z].e = a + c;
baz[coords.x][coords.y][coords.z].f = b * d;
}

View File

@ -12483,6 +12483,14 @@ void CompilerGLSL::end_scope()
statement("}");
}
void CompilerGLSL::end_scope(const string &trailer)
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("}", trailer);
}
void CompilerGLSL::end_scope_decl()
{
if (!indent)

View File

@ -332,6 +332,7 @@ protected:
void begin_scope();
void end_scope();
void end_scope(const std::string &trailer);
void end_scope_decl();
void end_scope_decl(const std::string &decl);

View File

@ -61,6 +61,12 @@ void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
resource_bindings[tuple] = { binding, false };
}
void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
{
SetBindingPair pair = { desc_set, binding };
buffers_requiring_dynamic_offset[pair] = { index, 0 };
}
void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
{
if (desc_set < kMaxArgumentBuffers)
@ -548,6 +554,18 @@ void CompilerMSL::build_implicit_builtins()
set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
view_mask_buffer_id = var_id;
}
if (!buffers_requiring_dynamic_offset.empty())
{
uint32_t var_id = build_constant_uint_array_pointer();
set_name(var_id, "spvDynamicOffsets");
// This should never match anything.
set_decoration(var_id, DecorationDescriptorSet, ~(5u));
set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
msl_options.dynamic_offsets_buffer_index);
dynamic_offsets_buffer_id = var_id;
}
}
void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
@ -786,6 +804,66 @@ void CompilerMSL::emit_entry_point_declarations()
"(", merge(args), ");");
}
// Emit dynamic buffers here.
for (auto &buffer : buffers_requiring_dynamic_offset)
{
const auto &var = get<SPIRVariable>(buffer.second.second);
uint32_t var_id = var.self;
const auto &type = get_variable_data_type(var);
string name = to_name(var.self);
uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
uint32_t arg_id = argument_buffer_ids[desc_set];
uint32_t base_index = buffer.second.first;
if (!type.array.empty())
{
// This is complicated, because we need to support arrays of arrays.
// And it's even worse if the outermost dimension is a runtime array, because now
// all this complicated goop has to go into the shader itself. (FIXME)
if (!type.array[type.array.size() - 1])
SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
else
{
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
type_to_array_glsl(type), " =");
uint32_t dim = uint32_t(type.array.size());
uint32_t j = 0;
for (SmallVector<uint32_t> indices(type.array.size());
indices[type.array.size() - 1] < to_array_size_literal(type); j++)
{
while (dim > 0)
{
begin_scope();
--dim;
}
string arrays;
for (uint32_t i = uint32_t(type.array.size()); i; --i)
arrays += join("[", indices[i - 1], "]");
statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false),
")((", get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")",
to_name(arg_id), ".", ensure_valid_name(name, "m"), arrays, " + ",
to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
{
end_scope(",");
indices[dim++] = 0;
}
}
end_scope_decl();
statement_no_indent("");
}
}
else
{
statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(",
get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
}
}
// Emit buffer arrays here.
for (uint32_t array_id : buffer_arrays)
{
@ -794,8 +872,8 @@ void CompilerMSL::emit_entry_point_declarations()
string name = to_name(array_id);
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
begin_scope();
for (uint32_t i = 0; i < type.array[0]; ++i)
statement(name + "_" + convert_to_string(i) + ",");
for (uint32_t i = 0; i < to_array_size_literal(type); ++i)
statement(name, "_", i, ",");
end_scope_decl();
statement_no_indent("");
}
@ -865,6 +943,8 @@ string CompilerMSL::compile()
active_interface_variables.insert(buffer_size_buffer_id);
if (view_mask_buffer_id)
active_interface_variables.insert(view_mask_buffer_id);
if (dynamic_offsets_buffer_id)
active_interface_variables.insert(dynamic_offsets_buffer_id);
if (builtin_layer_id)
active_interface_variables.insert(builtin_layer_id);
if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
@ -4702,7 +4782,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
}
else if (is_array(mbr_type))
{
for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++)
for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr,
@ -4731,7 +4811,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
else // Must be an array
{
assert(is_array(*type));
for (uint32_t j = 0; j < type->array[0]; j++, index++)
for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true);
@ -8173,7 +8253,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
// a NonWritable decoration. So just use discrete arguments for all storage images
// on iOS.
if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) &&
var.storage != StorageClassPushConstant)
var.storage != StorageClassPushConstant)
{
uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
if (descriptor_set_is_argument_buffer(desc_set))
@ -8707,7 +8787,7 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
uint32_t binding_stride = 1;
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar();
binding_stride *= to_array_size_literal(type, i);
assert(binding_stride != 0);
@ -11180,6 +11260,9 @@ void CompilerMSL::analyze_argument_buffers()
}
else
{
uint32_t binding = get_decoration(var.self, DecorationBinding);
SetBindingPair pair = { desc_set, binding };
if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
resource.basetype == SPIRType::SampledImage)
{
@ -11188,6 +11271,12 @@ void CompilerMSL::analyze_argument_buffers()
if (resource.plane == 0)
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
}
else if (buffers_requiring_dynamic_offset.count(pair))
{
// Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
buffer_type.member_types.push_back(var.basetype);
buffers_requiring_dynamic_offset[pair].second = var.self;
}
else
{
// Resources will be declared as pointers not references, so automatically dereference as appropriate.
@ -11213,6 +11302,11 @@ bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const
return desc_set == other.desc_set && binding == other.binding;
}
bool CompilerMSL::SetBindingPair::operator<(const SetBindingPair &other) const
{
return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding);
}
bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const
{
return model == other.model && desc_set == other.desc_set && binding == other.binding;

View File

@ -263,6 +263,7 @@ public:
uint32_t shader_tess_factor_buffer_index = 26;
uint32_t buffer_size_buffer_index = 25;
uint32_t view_mask_buffer_index = 24;
uint32_t dynamic_offsets_buffer_index = 23;
uint32_t shader_input_wg_index = 0;
uint32_t device_index = 0;
bool enable_point_size_builtin = true;
@ -397,6 +398,14 @@ public:
// the set/binding combination was used by the MSL code.
void add_msl_resource_binding(const MSLResourceBinding &resource);
// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
// in this shader. index is the index within the dynamic offset buffer to use. This
// function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
// or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers
// are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with
// an offset taken from the dynamic offset buffer.
void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
// This corresponds to VK_KHR_push_descriptor in Vulkan.
void add_discrete_descriptor_set(uint32_t desc_set);
@ -688,6 +697,7 @@ protected:
uint32_t swizzle_buffer_id = 0;
uint32_t buffer_size_buffer_id = 0;
uint32_t view_mask_buffer_id = 0;
uint32_t dynamic_offsets_buffer_id = 0;
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@ -717,6 +727,7 @@ protected:
uint32_t desc_set;
uint32_t binding;
bool operator==(const SetBindingPair &other) const;
bool operator<(const SetBindingPair &other) const;
};
struct StageSetBinding
@ -779,6 +790,9 @@ protected:
std::unordered_set<uint32_t> buffers_requiring_array_length;
SmallVector<uint32_t> buffer_arrays;
// Must be ordered since array is in a specific order.
std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
uint32_t argument_buffer_ids[kMaxArgumentBuffers];
uint32_t argument_buffer_discrete_mask = 0;
void analyze_argument_buffers();

View File

@ -209,6 +209,14 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-view-index-from-device-index')
if '.dispatchbase.' in shader:
msl_args.append('--msl-dispatch-base')
if '.dynamic-buffer.' in shader:
# Arbitrary for testing purposes.
msl_args.append('--msl-dynamic-buffer')
msl_args.append('0')
msl_args.append('0')
msl_args.append('--msl-dynamic-buffer')
msl_args.append('1')
msl_args.append('2')
subprocess.check_call(msl_args)