MSL: Add a workaround path to force native arrays for everything.

2020-02-24 12:47:14 +01:00 · 2020-02-24 12:47:14 +01:00 · c9d4f9cd74
commit c9d4f9cd74
parent f19fdb94d7
8 changed files with 288 additions and 22 deletions
--- a/main.cpp
+++ b/main.cpp
@ -522,6 +522,7 @@ struct CLIArguments
 	bool msl_dispatch_base = false;
 	bool msl_decoration_binding = false;
 	bool msl_force_active_argument_buffer_resources = false;
+	bool msl_force_native_arrays = false;
 	bool glsl_emit_push_constant_as_ubo = false;
 	bool glsl_emit_ubo_as_plain_uniforms = false;
 	bool vulkan_glsl_disable_ext_samplerless_texture_functions = false;
@ -616,6 +617,7 @@ static void print_help()
 	                "\t[--msl-inline-uniform-block <set index> <binding>]\n"
 	                "\t[--msl-decoration-binding]\n"
 	                "\t[--msl-force-active-argument-buffer-resources]\n"
+	                "\t[--msl-force-native-arrays]\n"
 	                "\t[--hlsl]\n"
 	                "\t[--reflect]\n"
 	                "\t[--shader-model]\n"
@ -806,6 +808,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 		msl_opts.dispatch_base = args.msl_dispatch_base;
 		msl_opts.enable_decoration_binding = args.msl_decoration_binding;
 		msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources;
+		msl_opts.force_native_arrays = args.msl_force_native_arrays;
 		msl_comp->set_msl_options(msl_opts);
 		for (auto &v : args.msl_discrete_descriptor_sets)
 			msl_comp->add_discrete_descriptor_set(v);
@ -1164,6 +1167,9 @@ static int main_inner(int argc, char *argv[])
 		uint32_t binding = parser.next_uint();
 		args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
 	});
+	cbs.add("--msl-force-native-arrays", [&args](CLIParser &) {
+		args.msl_force_native_arrays = true;
+	});
 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();
--- a/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
+++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device BUF& o [[buffer(0)]])
+{
+    o.a = 4;
+    o.b = o.c;
+}
+
--- a/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
+++ b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@ -0,0 +1,202 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+constant float _16[2] = { 1.0, 2.0 };
+constant float _19[2] = { 3.0, 4.0 };
+constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } };
+constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } };
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+kernel void main0(device BUF& o [[buffer(0)]])
+{
+    float c[2][2][2];
+    spvArrayCopyFromConstantToStack3(c.elements, _21);
+    o.a = int(c[1][1][1]);
+    float _43[2] = { o.b, o.c };
+    float _48[2] = { o.b, o.b };
+    float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } };
+    float _54[2] = { o.c, o.c };
+    float _59[2] = { o.c, o.b };
+    float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } };
+    float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } };
+    float d[2][2][2];
+    spvArrayCopyFromStackToStack3(d.elements, _61);
+    float e[2][2][2];
+    spvArrayCopyFromStackToStack3(e.elements, d);
+    o.b = e[1][0][1];
+}
+
--- a/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
+++ b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@ -0,0 +1,21 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer BUF
+{
+	int a;
+	float b;
+	float c;
+} o;
+
+void main()
+{
+	const float a[2][2][2] = float[][][](float[][](float[](1.0, 2.0), float[](3.0, 4.0)), float[][](float[](1.0, 2.0), float[](3.0, 4.0)));
+	float b[2][2][2] = a;
+	float c[2][2][2] = b;
+	o.a = int(c[1][1][1]);
+
+	float d[2][2][2] = float[][][](float[][](float[](o.b, o.c), float[](o.b, o.b)), float[][](float[](o.c, o.c), float[](o.c, o.b)));
+	float e[2][2][2] = d;
+	o.b = e[1][0][1];
+}
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -3697,7 +3697,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c)
 		{
 			res = type_to_glsl_constructor(type) + "{ ";
 		}
-		else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty())
+		else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && !type.array.empty())
 		{
 			res = type_to_glsl_constructor(type) + "({ ";
 			needs_trailing_tracket = true;
@ -8686,7 +8686,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// This path cannot be used for arithmetic.
 			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
-			else if (backend.use_typed_initializer_list && !out_type.array.empty())
+			else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
 			{
 				// MSL path. Array constructor is baked into type here, do not use _constructor variant.
 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -890,7 +890,7 @@ void CompilerMSL::emit_entry_point_declarations()
 				SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
 			else
 			{
-				use_builtin_array = true;
+				is_using_builtin_array = true;
 				statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
 				          type_to_array_glsl(type), " =");

@ -921,7 +921,7 @@ void CompilerMSL::emit_entry_point_declarations()
 				}
 				end_scope_decl();
 				statement_no_indent("");
-				use_builtin_array = false;
+				is_using_builtin_array = false;
 			}
 		}
 		else
@ -979,15 +979,17 @@ string CompilerMSL::compile()
 	backend.native_row_major_matrix = false;
 	backend.unsized_array_supported = false;
 	backend.can_declare_arrays_inline = false;
-	backend.can_return_array = true; // <-- Allow Metal to use the array<T> template
 	backend.allow_truncated_access_chain = true;
-	backend.array_is_value_type = true; // <-- Allow Metal to use the array<T> template to make arrays a value type
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
 	backend.support_small_type_sampling_result = true;
 	backend.supports_empty_struct = true;

+	// Allow Metal to use the array<T> template unless we force it off.
+	backend.can_return_array = !msl_options.force_native_arrays;
+	backend.array_is_value_type = !msl_options.force_native_arrays;
+
 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;

@ -6728,7 +6730,7 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla

 	// If threadgroup storage qualifiers are *not* used:
 	// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
-	if (lhs_thread && rhs_thread && !use_builtin_array)
+	if (lhs_thread && rhs_thread && !using_builtin_array())
 	{
 		statement(lhs, " = ", to_expression(rhs_id), ";");
 	}
@ -8458,9 +8460,9 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 	// address space.
 	// Array of resources should also be declared as builtin arrays.
 	if (has_member_decoration(type.self, index, DecorationOffset))
-		use_builtin_array = true;
+		is_using_builtin_array = true;
 	else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
-		use_builtin_array = true;
+		is_using_builtin_array = true;

 	if (member_is_packed_physical_type(type, index))
 	{
@ -8516,14 +8518,14 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 	{
 		BuiltIn builtin = BuiltInMax;
 		if (is_member_builtin(type, index, &builtin))
-			use_builtin_array = true;
+			is_using_builtin_array = true;
 		array_type = type_to_array_glsl(physical_type);
 	}

 	auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
 	                   member_attribute_qualifier(type, index), array_type, ";");

-	use_builtin_array = false;
+	is_using_builtin_array = false;
 	return result;
 }

@ -9400,7 +9402,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 					SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL.");

 				// Allow Metal to use the array<T> template to make arrays a value type
-				use_builtin_array = true;
+				is_using_builtin_array = true;
 				buffer_arrays.push_back(var_id);
 				for (uint32_t i = 0; i < array_size; ++i)
 				{
@ -9413,7 +9415,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 						ep_args += ", raster_order_group(0)";
 					ep_args += "]]";
 				}
-				use_builtin_array = false;
+				is_using_builtin_array = false;
 			}
 			else
 			{
@ -9979,9 +9981,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	// Allow Metal to use the array<T> template to make arrays a value type
 	string address_space = get_argument_address_space(var);
 	bool builtin = is_builtin_variable(var);
-	use_builtin_array = builtin;
+	is_using_builtin_array = builtin;
 	if (address_space == "threadgroup")
-		use_builtin_array = true;
+		is_using_builtin_array = true;

 	if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
 		decl += type_to_glsl(type, arg.id);
@ -9989,7 +9991,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id);
 	else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type))
 	{
-		use_builtin_array = true;
+		is_using_builtin_array = true;
 		decl += join(type_to_glsl(type, arg.id), "*");
 	}
 	else if (is_dynamic_img_sampler)
@ -10086,7 +10088,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		decl += "* " + to_expression(name_id) + "_atomic";
 	}

-	use_builtin_array = false;
+	is_using_builtin_array = false;

 	return decl;
 }
@ -10571,7 +10573,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	if (type.vecsize > 1)
 		type_name += to_string(type.vecsize);

-	if (type.array.empty() || use_builtin_array)
+	if (type.array.empty() || using_builtin_array())
 	{
 		return type_name;
 	}
@ -10607,7 +10609,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type)
 	}
 	default:
 	{
-		if (use_builtin_array)
+		if (using_builtin_array())
 			return CompilerGLSL::type_to_array_glsl(type);
 		else
 			return "";
@ -10620,12 +10622,12 @@ std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
 {
 	if (variable.storage == StorageClassWorkgroup)
 	{
-		use_builtin_array = true;
+		is_using_builtin_array = true;
 	}
 	std::string expr = CompilerGLSL::variable_decl(variable);
 	if (variable.storage == StorageClassWorkgroup)
 	{
-		use_builtin_array = false;
+		is_using_builtin_array = false;
 	}
 	return expr;
 }
@ -12710,3 +12712,8 @@ void CompilerMSL::activate_argument_buffer_resources()
 			active_interface_variables.insert(self);
 	});
 }
+
+bool CompilerMSL::using_builtin_array() const
+{
+	return msl_options.force_native_arrays || is_using_builtin_array;
+}
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@ -312,6 +312,11 @@ public:
 		// and would otherwise declare a different IAB.
 		bool force_active_argument_buffer_resources = false;

+		// Forces the use of plain arrays, which works around certain driver bugs on certain versions
+		// of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210.
+		// May reduce performance in scenarios where arrays are copied around as value-types.
+		bool force_native_arrays = false;
+
 		bool is_ios()
 		{
 			return platform == iOS;
@ -827,7 +832,10 @@ protected:

 	bool has_sampled_images = false;
 	bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index.
-	bool use_builtin_array = false; // Force the use of C style array declaration.
+
+	bool is_using_builtin_array = false; // Force the use of C style array declaration.
+	bool using_builtin_array() const;
+
 	bool is_rasterization_disabled = false;
 	bool capture_output_to_buffer = false;
 	bool needs_swizzle_buffer_def = false;
--- a/test_shaders.py
+++ b/test_shaders.py
@ -254,6 +254,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
        msl_args.append('0')
        msl_args.append('--msl-device-argument-buffer')
        msl_args.append('1')
+    if '.force-native-array.' in shader:
+        msl_args.append('--msl-force-native-arrays')

    subprocess.check_call(msl_args)