MSL: Add option to pad fragment outputs.

If not enough components are provided in the shader,
the shader MSL compiler throws an error rather than make components
undefined. This hurts portability, so we need to add explicit padding
here.
This commit is contained in:
Hans-Kristian Arntzen 2019-01-14 14:53:47 +01:00
parent b0f0fb7b7c
commit b8033d7525
7 changed files with 209 additions and 9 deletions

View File

@ -492,6 +492,7 @@ struct CLIArguments
bool support_nonzero_baseinstance = true;
bool msl_swizzle_texture_samples = false;
bool msl_ios = false;
bool msl_pad_fragment_output = false;
vector<PLSArg> pls_in;
vector<PLSArg> pls_out;
vector<Remap> remaps;
@ -546,6 +547,7 @@ static void print_help()
"\t[--msl-version <MMmmpp>]\n"
"\t[--msl-swizzle-texture-samples]\n"
"\t[--msl-ios]\n"
"\t[--msl-pad-fragment-output]\n"
"\t[--hlsl]\n"
"\t[--reflect]\n"
"\t[--shader-model]\n"
@ -714,6 +716,7 @@ static int main_inner(int argc, char *argv[])
cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; });
cbs.add("--msl-swizzle-texture-samples", [&args](CLIParser &) { args.msl_swizzle_texture_samples = true; });
cbs.add("--msl-ios", [&args](CLIParser &) { args.msl_ios = true; });
cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();
@ -843,6 +846,7 @@ static int main_inner(int argc, char *argv[])
msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples;
if (args.msl_ios)
msl_opts.platform = CompilerMSL::Options::iOS;
msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
msl_comp->set_msl_options(msl_opts);
}
else if (args.hlsl)

View File

@ -0,0 +1,35 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColors_0 [[color(0)]];
float4 FragColors_1 [[color(1)]];
float4 FragColor2 [[color(2)]];
float4 FragColor3 [[color(3)]];
};
struct main0_in
{
float3 vColor [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
float FragColors[2] = {};
float2 FragColor2 = {};
float3 FragColor3 = {};
FragColors[0] = in.vColor.x;
FragColors[1] = in.vColor.y;
FragColor2 = in.vColor.xz;
FragColor3 = in.vColor.zzz;
out.FragColors_0 = float4(FragColors[0]);
out.FragColors_1 = float4(FragColors[1]);
out.FragColor2 = FragColor2.xyyy;
out.FragColor3 = FragColor3.xyzz;
return out;
}

View File

@ -0,0 +1,42 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColors_0 [[color(0)]];
float4 FragColors_1 [[color(1)]];
float4 FragColor2 [[color(2)]];
float4 FragColor3 [[color(3)]];
};
struct main0_in
{
float3 vColor [[user(locn0)]];
};
void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3)
{
FragColors[0] = vColor.x;
FragColors[1] = vColor.y;
FragColor2 = vColor.xz;
FragColor3 = vColor.zzz;
}
fragment main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
float FragColors[2] = {};
float2 FragColor2 = {};
float3 FragColor3 = {};
set_globals(FragColors, in.vColor, FragColor2, FragColor3);
out.FragColors_0 = float4(FragColors[0]);
out.FragColors_1 = float4(FragColors[1]);
out.FragColor2 = FragColor2.xyyy;
out.FragColor3 = FragColor3.xyzz;
return out;
}

View File

@ -0,0 +1,18 @@
#version 450
layout(location = 0) out float FragColors[2];
layout(location = 2) out vec2 FragColor2;
layout(location = 3) out vec3 FragColor3;
layout(location = 0) in vec3 vColor;
void set_globals()
{
FragColors[0] = vColor.x;
FragColors[1] = vColor.y;
FragColor2 = vColor.xz;
FragColor3 = vColor.zzz;
}
void main()
{
set_globals();
}

View File

@ -82,6 +82,11 @@ CompilerMSL::CompilerMSL(ParsedIR &&ir_, MSLVertexAttr *p_vtx_attrs, size_t vtx_
resource_bindings.push_back(&p_res_bindings[i]);
}
void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
{
fragment_output_components[location] = components;
}
void CompilerMSL::build_implicit_builtins()
{
bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
@ -779,6 +784,27 @@ void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClas
p_va->used_by_shader = true;
}
uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
{
auto itr = fragment_output_components.find(location);
if (itr == end(fragment_output_components))
return 4;
else
return itr->second;
}
uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components)
{
uint32_t new_type_id = ir.increase_bound_by(1);
auto &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
type.vecsize = components;
type.self = new_type_id;
type.parent_type = 0;
type.pointer = false;
return new_type_id;
}
void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var)
{
@ -793,6 +819,26 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
var.basetype = type_id;
auto &type = get<SPIRType>(type_id);
uint32_t target_components = 0;
uint32_t type_components = type.vecsize;
bool padded_output = false;
// Check if we need to pad fragment output to match a certain number of components.
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
{
uint32_t locn = get_decoration(var.self, DecorationLocation);
target_components = get_target_components_for_fragment_location(locn);
if (type_components < target_components)
{
// Make a new type here.
type_id = build_extended_vector_type(type_id, target_components);
padded_output = true;
}
}
ib_type.member_types.push_back(get_pointee_type_id(type_id));
// Give the member a name
@ -801,7 +847,20 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
// Update the original variable reference to include the structure reference
string qual_var_name = ib_var_ref + "." + mbr_name;
ir.meta[var.self].decoration.qualified_alias = qual_var_name;
if (padded_output)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
entry_func.add_local_variable(var.self);
vars_needing_early_declaration.push_back(var.self);
entry_func.fixup_hooks_out.push_back([=, &var]() {
SPIRType &padded_type = this->get<SPIRType>(type_id);
statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";");
});
}
else
ir.meta[var.self].decoration.qualified_alias = qual_var_name;
// Copy the variable location from the original variable to the member
if (get_decoration_bitset(var.self).get(DecorationLocation))
@ -890,7 +949,26 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
ib_type.member_types.push_back(usable_type->self);
uint32_t target_components = 0;
bool padded_output = false;
uint32_t type_id = usable_type->self;
// Check if we need to pad fragment output to match a certain number of components.
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
{
uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
target_components = get_target_components_for_fragment_location(locn);
if (usable_type->vecsize < target_components)
{
// Make a new type here.
type_id = build_extended_vector_type(usable_type->self, target_components);
padded_output = true;
}
}
ib_type.member_types.push_back(get_pointee_type_id(type_id));
// Give the member a name
string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
@ -930,12 +1008,21 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back(
[=]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
[=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back(
[=]() { statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];"); });
entry_func.fixup_hooks_out.push_back([=, &var]() {
if (padded_output)
{
auto &padded_type = this->get<SPIRType>(type_id);
statement(ib_var_ref, ".", mbr_name, " = ",
remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
";");
}
else
statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
});
break;
default:
@ -1053,14 +1140,14 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
switch (storage)
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=]() {
entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, ".",
mbr_name, ";");
});
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back([=]() {
entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx),
"[", i, "];");
});
@ -1115,13 +1202,13 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
switch (storage)
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=]() {
entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";");
});
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back([=]() {
entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";");
});
break;

View File

@ -168,6 +168,10 @@ public:
bool disable_rasterization = false;
bool swizzle_texture_samples = false;
// Fragment output in MSL must have at least as many components as the render pass.
// Add support to explicit pad out components.
bool pad_fragment_output_components = false;
bool is_ios()
{
return platform == iOS;
@ -312,6 +316,10 @@ public:
// The remapped sampler must not be an array of samplers.
void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
// If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect
// to use for a particular location. The default is 4 if number of components is not overridden.
void set_fragment_output_components(uint32_t location, uint32_t components);
protected:
void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
void emit_instruction(const Instruction &instr) override;
@ -427,6 +435,7 @@ protected:
Options msl_options;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
@ -449,6 +458,9 @@ protected:
std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;
std::vector<uint32_t> buffer_arrays;
uint32_t get_target_components_for_fragment_location(uint32_t location) const;
uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components);
// OpcodeHandler that handles several MSL preprocessing operations.
struct OpCodePreprocessor : OpcodeHandler
{

View File

@ -152,6 +152,8 @@ def cross_compile_msl(shader, spirv, opt):
msl_args.append('--msl-swizzle-texture-samples')
if '.ios.' in shader:
msl_args.append('--msl-ios')
if '.pad-fragment.' in shader:
msl_args.append('--msl-pad-fragment-output')
subprocess.check_call(msl_args)