MSL: Support inline uniform blocks in argument buffers.

Here, the inline uniform block is explicit: we instantiate the buffer
block itself in the argument buffer, instead of a pointer to the buffer.
I just hope this will work with the `MTLArgumentDescriptor` API...

Note that Metal recursively assigns individual members of embedded
structs IDs. This means for automatic assignment that we have to
calculate the binding stride for a given buffer block. For MoltenVK,
we'll simply increment the ID by the size of the inline uniform block.
Then the later IDs will never conflict with the inline uniform block. We
can get away with this because Metal doesn't require that IDs be
contiguous, only monotonically increasing.
This commit is contained in:
Chip Davis 2019-12-16 22:58:16 -06:00
parent f9376058ce
commit fedbc35315
10 changed files with 240 additions and 13 deletions

View File

@ -315,7 +315,7 @@ if (SPIRV_CROSS_STATIC)
endif()
set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 23)
set(spirv-cross-abi-minor 24)
set(spirv-cross-abi-patch 0)
if (SPIRV_CROSS_SHARED)

View File

@ -529,6 +529,7 @@ struct CLIArguments
SmallVector<uint32_t> msl_discrete_descriptor_sets;
SmallVector<uint32_t> msl_device_argument_buffers;
SmallVector<pair<uint32_t, uint32_t>> msl_dynamic_buffers;
SmallVector<pair<uint32_t, uint32_t>> msl_inline_uniform_blocks;
SmallVector<PLSArg> pls_in;
SmallVector<PLSArg> pls_out;
SmallVector<Remap> remaps;
@ -612,6 +613,7 @@ static void print_help()
"\t[--msl-view-index-from-device-index]\n"
"\t[--msl-dispatch-base]\n"
"\t[--msl-dynamic-buffer <set index> <binding>]\n"
"\t[--msl-inline-uniform-block <set index> <binding>]\n"
"\t[--msl-decoration-binding]\n"
"\t[--msl-force-active-argument-buffer-resources]\n"
"\t[--hlsl]\n"
@ -812,6 +814,8 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
uint32_t i = 0;
for (auto &v : args.msl_dynamic_buffers)
msl_comp->add_dynamic_buffer(v.first, v.second, i++);
for (auto &v : args.msl_inline_uniform_blocks)
msl_comp->add_inline_uniform_block(v.first, v.second);
}
else if (args.hlsl)
compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir())));
@ -1153,6 +1157,13 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; });
cbs.add("--msl-force-active-argument-buffer-resources",
[&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; });
cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) {
args.msl_argument_buffers = true;
// Make sure next_uint() is called in-order.
uint32_t desc_set = parser.next_uint();
uint32_t binding = parser.next_uint();
args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
});
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -0,0 +1,53 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef packed_float4 packed_float4x4[4];
struct Baz
{
int f;
int g;
};
struct X
{
int x;
int y;
float z;
};
struct Foo
{
int a;
int b;
packed_float4x4 c;
X x[2];
};
struct Bar
{
int d;
int e;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
struct spvDescriptorSetBuffer0
{
Foo m_32 [[id(0)]];
constant Bar* m_38 [[id(12)]];
};
struct spvDescriptorSetBuffer1
{
device Baz* baz [[id(0)]][3];
};
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
}

View File

@ -0,0 +1,54 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef packed_float4 packed_float4x4[4];
struct Baz
{
int f;
int g;
};
struct X
{
int x;
int y;
float z;
};
struct Foo
{
int a;
int b;
packed_float4x4 c;
X x[2];
};
struct Bar
{
int d;
int e;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
struct spvDescriptorSetBuffer0
{
Foo m_32 [[id(0)]];
constant Bar* m_38 [[id(12)]];
};
struct spvDescriptorSetBuffer1
{
device Baz* baz [[id(0)]][3];
};
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint3 coords = gl_GlobalInvocationID;
spvDescriptorSet1.baz[coords.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
spvDescriptorSet1.baz[coords.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
}

View File

@ -0,0 +1,37 @@
#version 450
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in;
struct X
{
int x;
int y;
float z;
};
layout(set = 0, binding = 0, scalar) uniform Foo
{
int a;
int b;
mat4 c;
X x[2];
};
layout(set = 0, binding = 1) uniform Bar
{
int d;
int e;
};
layout(set = 1, binding = 2) buffer Baz
{
int f;
int g;
} baz[3];
void main()
{
uvec3 coords = gl_GlobalInvocationID;
baz[coords.x].f = a + d;
baz[coords.x].g = b * e;
}

View File

@ -1029,6 +1029,26 @@ spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigne
#endif
}
spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding)
{
#if SPIRV_CROSS_C_API_MSL
if (compiler->backend != SPVC_BACKEND_MSL)
{
compiler->context->report_error("MSL function used on a non-MSL backend.");
return SPVC_ERROR_INVALID_ARGUMENT;
}
auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
msl.add_inline_uniform_block(desc_set, binding);
return SPVC_SUCCESS;
#else
(void)binding;
(void)desc_set;
compiler->context->report_error("MSL function used on a non-MSL backend.");
return SPVC_ERROR_INVALID_ARGUMENT;
#endif
}
spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set)
{
#if SPIRV_CROSS_C_API_MSL

View File

@ -33,7 +33,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 23
#define SPVC_C_API_VERSION_MINOR 24
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@ -691,6 +691,8 @@ SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_second
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index);
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding);
/*
* Reflect resources.
* Maps almost 1:1 to C++ API.

View File

@ -68,6 +68,12 @@ void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32
buffers_requiring_dynamic_offset[pair] = { index, 0 };
}
void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding)
{
SetBindingPair pair = { desc_set, binding };
inline_uniform_blocks.insert(pair);
}
void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
{
if (desc_set < kMaxArgumentBuffers)
@ -9850,6 +9856,31 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
});
}
// Returns the number of IDs that Metal would assign to an inline uniform block in an argument buffer.
uint32_t CompilerMSL::get_inline_uniform_block_binding_stride(SPIRType &type)
{
// We need this information now.
mark_scalar_layout_structs(type);
unordered_set<uint32_t> aligned_structs;
align_struct(type, aligned_structs);
uint32_t binding_stride = 0;
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
uint32_t member_type_id = type.member_types[i];
uint32_t member_binding_stride = 1;
auto &member_type = get<SPIRType>(member_type_id);
if (member_type.basetype == SPIRType::Struct)
member_binding_stride = get_inline_uniform_block_binding_stride(member_type);
else if (member_is_packed_physical_type(type, i) && is_matrix(member_type))
// Packed matrices are represented as arrays, so they get multiple IDs.
member_binding_stride = member_type.columns;
for (uint32_t j = 0; j < uint32_t(member_type.array.size()); j++)
member_binding_stride *= to_array_size_literal(member_type, j);
binding_stride += member_binding_stride;
}
return binding_stride;
}
// Returns the Metal index of the resource of the specified type as used by the specified variable.
uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
{
@ -9912,7 +9943,14 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
// If we did not explicitly remap, allocate bindings on demand.
// We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
bool allocate_argument_buffer_ids = false;
if (var.storage != StorageClassPushConstant)
allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set);
uint32_t binding_stride = 1;
if (inline_uniform_blocks.count(SetBindingPair{ var_desc_set, var_binding }))
binding_stride = get_inline_uniform_block_binding_stride(get_variable_data_type(var));
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
binding_stride *= to_array_size_literal(type, i);
@ -9922,20 +9960,11 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
// If a binding has not been specified, revert to incrementing resource indices.
uint32_t resource_index;
bool allocate_argument_buffer_ids = false;
uint32_t desc_set = 0;
if (var.storage != StorageClassPushConstant)
{
desc_set = get_decoration(var.self, DecorationDescriptorSet);
allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set);
}
if (allocate_argument_buffer_ids)
{
// Allocate from a flat ID binding space.
resource_index = next_metal_resource_ids[desc_set];
next_metal_resource_ids[desc_set] += binding_stride;
resource_index = next_metal_resource_ids[var_desc_set];
next_metal_resource_ids[var_desc_set] += binding_stride;
}
else
{
@ -12676,6 +12705,12 @@ void CompilerMSL::analyze_argument_buffers()
buffer_type.member_types.push_back(var.basetype);
buffers_requiring_dynamic_offset[pair].second = var.self;
}
else if (inline_uniform_blocks.count(pair))
{
// Put the buffer block itself into the argument buffer.
buffer_type.member_types.push_back(get_variable_data_type_id(var));
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
}
else
{
// Resources will be declared as pointers not references, so automatically dereference as appropriate.

View File

@ -432,6 +432,13 @@ public:
// an offset taken from the dynamic offset buffer.
void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
// in this shader. This function marks that resource as an inline uniform block
// (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers
// are enabled. If so, the buffer block will be directly embedded into the argument
// buffer, instead of being referenced indirectly via pointer.
void add_inline_uniform_block(uint32_t desc_set, uint32_t binding);
// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
// This corresponds to VK_KHR_push_descriptor in Vulkan.
void add_discrete_descriptor_set(uint32_t desc_set);
@ -854,6 +861,9 @@ protected:
// Must be ordered since array is in a specific order.
std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks;
uint32_t get_inline_uniform_block_binding_stride(SPIRType &type);
uint32_t argument_buffer_ids[kMaxArgumentBuffers];
uint32_t argument_buffer_discrete_mask = 0;
uint32_t argument_buffer_device_storage_mask = 0;

View File

@ -244,6 +244,11 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-dynamic-buffer')
msl_args.append('1')
msl_args.append('2')
if '.inline-block.' in shader:
# Arbitrary for testing purposes.
msl_args.append('--msl-inline-uniform-block')
msl_args.append('0')
msl_args.append('0')
if '.device-argument-buffer.' in shader:
msl_args.append('--msl-device-argument-buffer')
msl_args.append('0')