MSL: Add a workaround path to force native arrays for everything.

This commit is contained in:
Hans-Kristian Arntzen 2020-02-24 12:47:14 +01:00
parent f19fdb94d7
commit c9d4f9cd74
8 changed files with 288 additions and 22 deletions

View File

@ -522,6 +522,7 @@ struct CLIArguments
bool msl_dispatch_base = false;
bool msl_decoration_binding = false;
bool msl_force_active_argument_buffer_resources = false;
bool msl_force_native_arrays = false;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
bool vulkan_glsl_disable_ext_samplerless_texture_functions = false;
@ -616,6 +617,7 @@ static void print_help()
"\t[--msl-inline-uniform-block <set index> <binding>]\n"
"\t[--msl-decoration-binding]\n"
"\t[--msl-force-active-argument-buffer-resources]\n"
"\t[--msl-force-native-arrays]\n"
"\t[--hlsl]\n"
"\t[--reflect]\n"
"\t[--shader-model]\n"
@ -806,6 +808,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.dispatch_base = args.msl_dispatch_base;
msl_opts.enable_decoration_binding = args.msl_decoration_binding;
msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources;
msl_opts.force_native_arrays = args.msl_force_native_arrays;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
@ -1164,6 +1167,9 @@ static int main_inner(int argc, char *argv[])
uint32_t binding = parser.next_uint();
args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
});
cbs.add("--msl-force-native-arrays", [&args](CLIParser &) {
args.msl_force_native_arrays = true;
});
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -0,0 +1,20 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct BUF
{
int a;
float b;
float c;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device BUF& o [[buffer(0)]])
{
o.a = 4;
o.b = o.c;
}

View File

@ -0,0 +1,202 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct BUF
{
int a;
float b;
float c;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
constant float _16[2] = { 1.0, 2.0 };
constant float _19[2] = { 3.0, 4.0 };
constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } };
constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } };
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToStack1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToStack1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B>
inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToStack2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToStack2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]);
}
}
template<typename T, uint A, uint B, uint C>
inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
{
for (uint i = 0; i < A; i++)
{
spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]);
}
}
kernel void main0(device BUF& o [[buffer(0)]])
{
float c[2][2][2];
spvArrayCopyFromConstantToStack3(c.elements, _21);
o.a = int(c[1][1][1]);
float _43[2] = { o.b, o.c };
float _48[2] = { o.b, o.b };
float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } };
float _54[2] = { o.c, o.c };
float _59[2] = { o.c, o.b };
float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } };
float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } };
float d[2][2][2];
spvArrayCopyFromStackToStack3(d.elements, _61);
float e[2][2][2];
spvArrayCopyFromStackToStack3(e.elements, d);
o.b = e[1][0][1];
}

View File

@ -0,0 +1,21 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 0, std430) buffer BUF
{
int a;
float b;
float c;
} o;
void main()
{
const float a[2][2][2] = float[][][](float[][](float[](1.0, 2.0), float[](3.0, 4.0)), float[][](float[](1.0, 2.0), float[](3.0, 4.0)));
float b[2][2][2] = a;
float c[2][2][2] = b;
o.a = int(c[1][1][1]);
float d[2][2][2] = float[][][](float[][](float[](o.b, o.c), float[](o.b, o.b)), float[][](float[](o.c, o.c), float[](o.c, o.b)));
float e[2][2][2] = d;
o.b = e[1][0][1];
}

View File

@ -3697,7 +3697,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c)
{
res = type_to_glsl_constructor(type) + "{ ";
}
else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty())
else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && !type.array.empty())
{
res = type_to_glsl_constructor(type) + "({ ";
needs_trailing_tracket = true;
@ -8686,7 +8686,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// This path cannot be used for arithmetic.
if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
else if (backend.use_typed_initializer_list && !out_type.array.empty())
else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
{
// MSL path. Array constructor is baked into type here, do not use _constructor variant.
constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";

View File

@ -890,7 +890,7 @@ void CompilerMSL::emit_entry_point_declarations()
SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
else
{
use_builtin_array = true;
is_using_builtin_array = true;
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
type_to_array_glsl(type), " =");
@ -921,7 +921,7 @@ void CompilerMSL::emit_entry_point_declarations()
}
end_scope_decl();
statement_no_indent("");
use_builtin_array = false;
is_using_builtin_array = false;
}
}
else
@ -979,15 +979,17 @@ string CompilerMSL::compile()
backend.native_row_major_matrix = false;
backend.unsized_array_supported = false;
backend.can_declare_arrays_inline = false;
backend.can_return_array = true; // <-- Allow Metal to use the array<T> template
backend.allow_truncated_access_chain = true;
backend.array_is_value_type = true; // <-- Allow Metal to use the array<T> template to make arrays a value type
backend.comparison_image_samples_scalar = true;
backend.native_pointers = true;
backend.nonuniform_qualifier = "";
backend.support_small_type_sampling_result = true;
backend.supports_empty_struct = true;
// Allow Metal to use the array<T> template unless we force it off.
backend.can_return_array = !msl_options.force_native_arrays;
backend.array_is_value_type = !msl_options.force_native_arrays;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@ -6728,7 +6730,7 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
// If threadgroup storage qualifiers are *not* used:
// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
if (lhs_thread && rhs_thread && !use_builtin_array)
if (lhs_thread && rhs_thread && !using_builtin_array())
{
statement(lhs, " = ", to_expression(rhs_id), ";");
}
@ -8458,9 +8460,9 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
// address space.
// Array of resources should also be declared as builtin arrays.
if (has_member_decoration(type.self, index, DecorationOffset))
use_builtin_array = true;
is_using_builtin_array = true;
else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
use_builtin_array = true;
is_using_builtin_array = true;
if (member_is_packed_physical_type(type, index))
{
@ -8516,14 +8518,14 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
{
BuiltIn builtin = BuiltInMax;
if (is_member_builtin(type, index, &builtin))
use_builtin_array = true;
is_using_builtin_array = true;
array_type = type_to_array_glsl(physical_type);
}
auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
member_attribute_qualifier(type, index), array_type, ";");
use_builtin_array = false;
is_using_builtin_array = false;
return result;
}
@ -9400,7 +9402,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL.");
// Allow Metal to use the array<T> template to make arrays a value type
use_builtin_array = true;
is_using_builtin_array = true;
buffer_arrays.push_back(var_id);
for (uint32_t i = 0; i < array_size; ++i)
{
@ -9413,7 +9415,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += ", raster_order_group(0)";
ep_args += "]]";
}
use_builtin_array = false;
is_using_builtin_array = false;
}
else
{
@ -9979,9 +9981,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
// Allow Metal to use the array<T> template to make arrays a value type
string address_space = get_argument_address_space(var);
bool builtin = is_builtin_variable(var);
use_builtin_array = builtin;
is_using_builtin_array = builtin;
if (address_space == "threadgroup")
use_builtin_array = true;
is_using_builtin_array = true;
if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
decl += type_to_glsl(type, arg.id);
@ -9989,7 +9991,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id);
else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type))
{
use_builtin_array = true;
is_using_builtin_array = true;
decl += join(type_to_glsl(type, arg.id), "*");
}
else if (is_dynamic_img_sampler)
@ -10086,7 +10088,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
decl += "* " + to_expression(name_id) + "_atomic";
}
use_builtin_array = false;
is_using_builtin_array = false;
return decl;
}
@ -10571,7 +10573,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
if (type.vecsize > 1)
type_name += to_string(type.vecsize);
if (type.array.empty() || use_builtin_array)
if (type.array.empty() || using_builtin_array())
{
return type_name;
}
@ -10607,7 +10609,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type)
}
default:
{
if (use_builtin_array)
if (using_builtin_array())
return CompilerGLSL::type_to_array_glsl(type);
else
return "";
@ -10620,12 +10622,12 @@ std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
{
if (variable.storage == StorageClassWorkgroup)
{
use_builtin_array = true;
is_using_builtin_array = true;
}
std::string expr = CompilerGLSL::variable_decl(variable);
if (variable.storage == StorageClassWorkgroup)
{
use_builtin_array = false;
is_using_builtin_array = false;
}
return expr;
}
@ -12710,3 +12712,8 @@ void CompilerMSL::activate_argument_buffer_resources()
active_interface_variables.insert(self);
});
}
bool CompilerMSL::using_builtin_array() const
{
return msl_options.force_native_arrays || is_using_builtin_array;
}

View File

@ -312,6 +312,11 @@ public:
// and would otherwise declare a different IAB.
bool force_active_argument_buffer_resources = false;
// Forces the use of plain arrays, which works around certain driver bugs on certain versions
// of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210.
// May reduce performance in scenarios where arrays are copied around as value-types.
bool force_native_arrays = false;
bool is_ios()
{
return platform == iOS;
@ -827,7 +832,10 @@ protected:
bool has_sampled_images = false;
bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index.
bool use_builtin_array = false; // Force the use of C style array declaration.
bool is_using_builtin_array = false; // Force the use of C style array declaration.
bool using_builtin_array() const;
bool is_rasterization_disabled = false;
bool capture_output_to_buffer = false;
bool needs_swizzle_buffer_def = false;

View File

@ -254,6 +254,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('0')
msl_args.append('--msl-device-argument-buffer')
msl_args.append('1')
if '.force-native-array.' in shader:
msl_args.append('--msl-force-native-arrays')
subprocess.check_call(msl_args)