From e32c474911781b7a2e0cc1e70f85b7404fc71a65 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Fri, 9 Apr 2021 13:23:09 +0200
Subject: [PATCH] MSL: Handle masking of TESC IO block members.

---
 ...te-outputs-block.mask-location-0.msl2.tesc | 41 +++++++++++
 ...te-outputs-block.mask-location-1.msl2.tesc | 41 +++++++++++
 ...te-outputs-block.mask-location-0.msl2.tesc | 49 +++++++++++++
 ...te-outputs-block.mask-location-1.msl2.tesc | 49 +++++++++++++
 ...te-outputs-block.mask-location-0.msl2.tesc | 28 ++++++++
 ...te-outputs-block.mask-location-1.msl2.tesc | 28 ++++++++
 spirv_msl.cpp                                 | 68 ++++++++++++-------
 spirv_msl.hpp                                 |  2 +-
 8 files changed, 281 insertions(+), 25 deletions(-)
 create mode 100644 reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
 create mode 100644 reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
 create mode 100644 reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
 create mode 100644 reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
 create mode 100644 shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
 create mode 100644 shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc

diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000..a8d1b750
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,41 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_a;
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_b;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup P _11;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    _11.a = 1.0;
+    patchOut.P_b = 2.0;
+    gl_out[gl_InvocationID].C_a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000..8af75f7b
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,41 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_a;
+    float P_b;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup C c[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.P_a = 1.0;
+    patchOut.P_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000..e0641663
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_a;
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    _11.a = 1.0;
+    patchOut.P_b = 2.0;
+    gl_out[gl_InvocationID].C_a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup P _11;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_function(_11, patchOut, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000..e40fa2d6
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_a;
+    float P_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    patchOut.P_a = 1.0;
+    patchOut.P_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup C c[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_function(patchOut, c, gl_out, gl_InvocationID);
+}
+
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000..955f2c41
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000..955f2c41
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 17cadeea..92716580 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1774,9 +1774,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				else
 					name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
 
-				if (var.storage == StorageClassOutput &&
-				    has_decoration(p_type->self, DecorationBlock) &&
-				    is_builtin)
+				if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock))
 				{
 					// If we're redirecting a block, we might still need to access the original block
 					// variable if we're masking some members.
@@ -1787,29 +1785,37 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 
 					if (needs_local_declaration)
 					{
-						// Ensure correct names for the block members if we're actually going to
-						// declare gl_PerVertex.
-						for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++)
+						if (is_builtin)
 						{
-							set_member_name(p_type->self, mbr_idx, builtin_to_glsl(
-									BuiltIn(get_member_decoration(p_type->self, mbr_idx, DecorationBuiltIn)),
-									StorageClassOutput));
-						}
+							// Ensure correct names for the block members if we're actually going to
+							// declare gl_PerVertex.
+							for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++)
+							{
+								set_member_name(p_type->self, mbr_idx, builtin_to_glsl(
+										BuiltIn(get_member_decoration(p_type->self, mbr_idx, DecorationBuiltIn)),
+										StorageClassOutput));
+							}
 
-						if (!stage_out_var_id_masked)
+							if (!stage_out_var_id_masked_builtin)
+							{
+								stage_out_var_id_masked_builtin = ir.increase_bound_by(1);
+								set<SPIRVariable>(stage_out_var_id_masked_builtin, var.basetype, StorageClassOutput);
+								set_name(stage_out_var_id_masked_builtin, name + "_masked");
+								set_name(var.self, name + "_masked");
+
+								// Not required, but looks nicer.
+								set_name(p_type->self, "gl_PerVertex");
+
+								auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+								entry_func.add_local_variable(stage_out_var_id_masked_builtin);
+							}
+							func.add_parameter(var.basetype, stage_out_var_id_masked_builtin, true);
+						}
+						else
 						{
-							stage_out_var_id_masked = ir.increase_bound_by(1);
-							set<SPIRVariable>(stage_out_var_id_masked, var.basetype, StorageClassOutput);
-							set_name(stage_out_var_id_masked, name + "_masked");
-							set_name(var.self, name + "_masked");
-
-							// Not required, but looks nicer.
-							set_name(p_type->self, "gl_PerVertex");
-
-							auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
-							entry_func.add_local_variable(stage_out_var_id_masked);
+							// Local variable is declared in add_variable_to_interface_block().
+							func.add_parameter(var.basetype, var.self, true);
 						}
-						func.add_parameter(var.basetype, stage_out_var_id_masked, true);
 					}
 				}
 
@@ -3012,6 +3018,8 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
 		}
 		else
 		{
+			bool masked_block = false;
+
 			// Flatten the struct members into the interface struct
 			for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
 			{
@@ -3021,6 +3029,8 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
 
 				if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array))
 				{
+					masked_block = true;
+
 					// Non-builtin block output variables are just ignored, since they will still access
 					// the block variable as-is. They're just not flattened.
 					if (is_builtin && !meta.strip_array)
@@ -3071,6 +3081,11 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
 					}
 				}
 			}
+
+			// If we're redirecting a block, we might still need to access the original block
+			// variable if we're masking some members.
+			if (masked_block && !needs_local_declaration && !is_builtin_variable(var))
+				emit_local_masked_variable(var);
 		}
 	}
 	else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput &&
@@ -6525,7 +6540,7 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 				is_declarable_struct = false;
 
 			// Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it.
-			if (stage_out_var_id_masked && get<SPIRType>(get<SPIRVariable>(stage_out_var_id_masked).basetype).self == type_id)
+			if (stage_out_var_id_masked_builtin && get<SPIRType>(get<SPIRVariable>(stage_out_var_id_masked_builtin).basetype).self == type_id)
 				is_declarable_struct = true;
 
 			// Align and emit declarable structs...but avoid declaring each more than once.
@@ -13063,8 +13078,13 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable
 		auto model = get_execution_model();
 
 		// Specially masked IO block variable.
-		if (variable.self == stage_out_var_id_masked)
+		// Normally, we will never access IO blocks directly here.
+		// The only scenario which that should occur is with a masked IO block.
+		if (model == ExecutionModelTessellationControl && variable.storage == StorageClassOutput &&
+		    has_decoration(get<SPIRType>(variable.basetype).self, DecorationBlock))
+		{
 			return true;
+		}
 
 		return variable.storage == StorageClassOutput &&
 		       model == ExecutionModelTessellationControl &&
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index c0525f7c..2059774f 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -990,7 +990,7 @@ protected:
 	VariableID patch_stage_out_var_id = 0;
 	VariableID stage_in_ptr_var_id = 0;
 	VariableID stage_out_ptr_var_id = 0;
-	VariableID stage_out_var_id_masked = 0;
+	VariableID stage_out_var_id_masked_builtin = 0;
 
 	// Handle HLSL-style 0-based vertex/instance index.
 	enum class TriState