Fixes to the generation of Metal tessellation shaders from SPIRV so that it works correctly in more complicated cases.

First, when generating from HLSL before invoking the code that comes from the HLSL patch-function a control-flow and full memory-barrier are required to ensure that all the temporary values in thread-local storage for the patch are available.
    Second, the inputs to control and evaluation shaders must be properly forwarded from the global variables in SPIRV to the member variables in the relevant input structure.
    Finally when arrays of interpolators are used for input or output we need to add an extra level of array indirection because Metal works at a different granularity than SPIRV.

    Five parts.
    1. Fix tessellation patch function processing.
    2. Fix loads from tessellation control inputs not being forwarded to the gl_in structure array.
    3. Fix loads from tessellation evaluation inputs not being forwarded to the stage_in structure array.
    4. Workaround SPIRV losing an array indirection in tessellation shaders - not the best solution but enough to keep things progressing.
    5. Apparently gl_TessLevelInner/Outer is special and needs to not be placed into the input array.
This commit is contained in:
Mark Satterthwaite 2019-08-14 10:51:48 -04:00 committed by Lukas Hermanns
parent de6441af88
commit 42b8a62870
4 changed files with 516 additions and 6 deletions

View File

@ -3965,6 +3965,19 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
}
break;
/* UE Change Begin: Metal tessellation likes empty structs which are then constant expressions. */
case SPIRType::Struct:
if (type.member_types.size() == 0)
{
res += "{ }";
}
else
{
SPIRV_CROSS_THROW("Invalid constant struct initialisation missing member initializers.");
}
break;
/* UE Change End: Metal tessellation likes empty structs which are then constant expressions. */
default:
SPIRV_CROSS_THROW("Invalid constant expression basetype.");
}

View File

@ -490,8 +490,10 @@ protected:
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
bool suppress_usage_tracking = false);
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
AccessChainMeta *meta);
/* UE Change Begin: Storage buffer robustness */
virtual std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
AccessChainMeta *meta);
/* UE Change End: Storage buffer robustness */
std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
AccessChainMeta *meta = nullptr, bool ptr_chain = false);

View File

@ -6122,8 +6122,11 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
if (get_execution_model() == ExecutionModelTessellationControl ||
(mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
mem_flags += "mem_flags::mem_device";
if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
MemorySemanticsAtomicCounterMemoryMask))
/* UE Change Begin: Fix tessellation patch function processing */
if (get_execution_model() == ExecutionModelTessellationControl ||
(mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
MemorySemanticsAtomicCounterMemoryMask)))
/* UE Change End: Fix tessellation patch function processing */
{
if (!mem_flags.empty())
mem_flags += " | ";
@ -9468,7 +9471,6 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
address_space = "constant";
}
}
//>>>>>>> 6ecced4b... Rework the way arrays are handled in Metal to remove the array copies as they are unnecessary from Metal 1.2. There were cases where copies were not being inserted and others appeared unncessary, using the template type should allow the 'metal' compiler to do the best possible optimisation. The changes are broken into three stages. 1. Allow Metal to use the array<T> template to make arrays a value type. 2. Force the use of C style array declaration for some cases which cannot be wrapped with a template. 3. Threadgroup arrays can't have a wrapper type. 4. Tweak the code to use unsafe_array in a few more places so that we can handle passing arrays of resources into the shader and then through shaders into sub-functions. 5. Handle packed matrix types inside arrays within structs. 6. Make sure that builtin arguments still retain their array qualifiers when used in leaf functions. 7. Fix declaration of array-of-array constants for Metal so we can use the array<T> template.
}
else if (is_dynamic_img_sampler)
{
@ -11465,6 +11467,43 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
break;
}
/* UE Change Begin: Fix tessellation patch function processing */
case OpLoad:
{
if(compiler.get_execution_model() == ExecutionModelTessellationControl)
{
uint32_t id = args[1];
uint32_t ptr = args[2];
uint32_t source_id = ptr;
auto *var = compiler.maybe_get_backing_variable(source_id);
if (var)
source_id = var->self;
// Only interested in standalone builtin variables.
if (compiler.has_decoration(source_id, DecorationBuiltIn))
{
auto builtin = static_cast<BuiltIn>(compiler.get_decoration(source_id, DecorationBuiltIn));
switch (builtin)
{
case BuiltInInvocationId:
invocation_ids[id] = ptr;
break;
default:
break;
}
}
}
break;
}
case OpControlBarrier:
{
passed_control_barrier = true;
break;
}
/* UE Change End: Fix tessellation patch function processing */
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
@ -11473,6 +11512,54 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
uint32_t result_type = args[0];
uint32_t id = args[1];
uint32_t ptr = args[2];
/* UE Change Begin: Fix tessellation patch function processing */
if(compiler.get_execution_model() == ExecutionModelTessellationControl)
{
uint32_t source_id = args[3];
bool isIndexedByInvocation = variables_indexed_by_invocation.find(ptr) != variables_indexed_by_invocation.end() || invocation_ids.find(source_id) != invocation_ids.end();
if (!isIndexedByInvocation)
{
auto *var = compiler.maybe_get_backing_variable(source_id);
if (var)
source_id = var->self;
// Only interested in standalone builtin variables.
if (compiler.has_decoration(source_id, DecorationBuiltIn))
{
auto builtin = static_cast<BuiltIn>(compiler.get_decoration(source_id, DecorationBuiltIn));
switch (builtin)
{
case BuiltInInvocationId:
isIndexedByInvocation = true;
break;
default:
break;
}
}
}
if (isIndexedByInvocation)
{
if (passed_control_barrier)
{
auto *var = compiler.maybe_get_backing_variable(ptr);
if (var)
{
auto* var_type = compiler.maybe_get<SPIRType>(var->basetype);
var_type->storage = StorageClassWorkgroup;
var->storage = StorageClassWorkgroup;
variables_indexed_by_invocation.erase(ptr);
}
}
else
{
variables_indexed_by_invocation.insert(ptr);
}
}
}
/* UE Change End: Fix tessellation patch function processing */
compiler.set<SPIRExpression>(id, "", result_type, true);
compiler.register_read(id, ptr, true);
compiler.ir.ids[id].set_allow_type_rewrite();
@ -11500,6 +11587,403 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
uses_resource_write = true;
}
/* UE Change Begin: Fix loads from tessellation control inputs not being forwarded to the gl_in structure array */
/* UE Change Begin: Fix loads from tessellation evaluation inputs not being forwarded to the stage_in structure array */
std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta)
{
string expr;
bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
if (!chain_only)
{
// We handle transpose explicitly, so don't resolve that here.
auto *e = maybe_get<SPIRExpression>(base);
bool old_transpose = e && e->need_transpose;
if (e)
e->need_transpose = false;
expr = to_enclosed_expression(base, register_expression_read);
if (e)
e->need_transpose = old_transpose;
}
// Start traversing type hierarchy at the proper non-pointer types,
// but keep type_id referencing the original pointer for use below.
uint32_t type_id = expression_type_id(base);
if (!backend.native_pointers)
{
if (ptr_chain)
SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
// continuing the access chain.
if (should_dereference(base))
{
auto &type = get<SPIRType>(type_id);
expr = dereference_expression(type, expr);
}
}
const auto *type = &get_pointee_type(type_id);
auto *var = maybe_get<SPIRVariable>(base);
const auto *var_type = var ? maybe_get<SPIRType>(var->basetype) : nullptr;
bool ssbo = msl_options.enforce_storge_buffer_bounds && var && var_type && (var->storage == StorageClassStorageBuffer || (var_type->basetype == SPIRType::Struct && var->storage == StorageClassUniform && has_decoration(var_type->self, DecorationBufferBlock)));
bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
bool is_invariant = has_decoration(base, DecorationInvariant);
bool pending_array_enclose = false;
bool dimension_flatten = false;
auto* tess_var = maybe_get_backing_variable(base);
bool tess_control_input = (get_execution_model() == ExecutionModelTessellationControl && tess_var && tess_var->storage == StorageClassInput);
bool tess_eval_input = (get_execution_model() == ExecutionModelTessellationEvaluation && tess_var && tess_var->storage == StorageClassInput && expr.find("gl_in") == string::npos) && expr != "gl_TessLevelInner" && expr != "gl_TessLevelOuter";
bool tess_eval_input_array = (get_execution_model() == ExecutionModelTessellationEvaluation && access_chain_is_arrayed && expr.find("gl_in[") != string::npos);
/* UE Change Begin: Workaround SPIRV losing an array indirection in tessellation shaders - not the best solution but enough to keep things progressing. */
bool tess_control_input_array = ((get_execution_model() == ExecutionModelTessellationControl || get_execution_model() == ExecutionModelTessellationEvaluation) && type->array.size() == 2 && type->array[0] >= 1);
uint32_t tess_control_input_array_num = type->array[0];
bool tess_eval_input_array_deref = type && tess_eval_input_array && expr.find("({") == 0;
if (tess_eval_input_array_deref)
{
expr = type_to_glsl(*type) + expr;
}
const auto append_index = [&](uint32_t index) {
std::string name;
if (tess_control_input) {
name = expr;
expr = "gl_in";
}
else if (tess_eval_input && !tess_eval_input_array) {
name = expr;
expr = to_expression(patch_stage_in_var_id) + ".gl_in";
}
expr += "[";
// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
bool nonuniform_index =
has_decoration(index, DecorationNonUniformEXT) &&
(has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
if (nonuniform_index)
{
expr += backend.nonuniform_qualifier;
expr += "(";
}
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index, register_expression_read);
if (nonuniform_index)
expr += ")";
if (ssbo)
{
expr += ")";
ssbo = false;
}
expr += "]";
if (tess_eval_input_array)
{
tess_eval_input_array = false;
}
if (tess_control_input || tess_eval_input)
{
expr += ".";
expr += name;
tess_control_input = false;
tess_eval_input = false;
if (tess_control_input_array)
{
name = expr;
expr = "{ ";
for (uint32_t i = 0; i < tess_control_input_array_num; i++) {
if (i > 0)
expr += ", ";
expr += name;
expr += "_";
expr += convert_to_string(i);
}
expr += " }";
}
}
};
/* UE Change End: Workaround SPIRV losing an array indirection in tessellation shaders - not the best solution but enough to keep things progressing. */
for (uint32_t i = 0; i < count; i++)
{
uint32_t index = indices[i];
// Pointer chains
if (ptr_chain && i == 0)
{
// If we are flattening multidimensional arrays, only create opening bracket on first
// array index.
if (options.flatten_multidimensional_arrays)
{
dimension_flatten = type->array.size() >= 1;
pending_array_enclose = dimension_flatten;
if (pending_array_enclose)
expr += "[";
}
if (options.flatten_multidimensional_arrays && dimension_flatten)
{
// If we are flattening multidimensional arrays, do manual stride computation.
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_enclosed_expression(index, register_expression_read);
for (auto j = uint32_t(type->array.size()); j; j--)
{
expr += " * ";
expr += enclose_expression(to_array_size(*type, j - 1));
}
if (type->array.empty())
pending_array_enclose = false;
else
expr += " + ";
if (!pending_array_enclose)
expr += "]";
}
else
{
append_index(index);
}
if (type->basetype == SPIRType::ControlPointArray)
{
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
access_chain_is_arrayed = true;
}
// Arrays
else if (!type->array.empty())
{
// If we are flattening multidimensional arrays, only create opening bracket on first
// array index.
if (options.flatten_multidimensional_arrays && !pending_array_enclose)
{
dimension_flatten = type->array.size() > 1;
pending_array_enclose = dimension_flatten;
if (pending_array_enclose)
expr += "[";
}
assert(type->parent_type);
if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
!has_decoration(type->self, DecorationBlock))
{
// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
// Normally, these variables live in blocks when compiled from GLSL,
// but HLSL seems to just emit straight arrays here.
// We must pretend this access goes through gl_in/gl_out arrays
// to be able to access certain builtins as arrays.
auto builtin = ir.meta[base].decoration.builtin_type;
switch (builtin)
{
// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
// case BuiltInClipDistance:
case BuiltInPosition:
case BuiltInPointSize:
if (var->storage == StorageClassInput)
expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
else if (var->storage == StorageClassOutput)
expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
else
append_index(index);
break;
default:
append_index(index);
break;
}
}
else if (options.flatten_multidimensional_arrays && dimension_flatten)
{
// If we are flattening multidimensional arrays, do manual stride computation.
auto &parent_type = get<SPIRType>(type->parent_type);
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_enclosed_expression(index, register_expression_read);
for (auto j = uint32_t(parent_type.array.size()); j; j--)
{
expr += " * ";
expr += enclose_expression(to_array_size(parent_type, j - 1));
}
if (parent_type.array.empty())
pending_array_enclose = false;
else
expr += " + ";
if (!pending_array_enclose)
expr += "]";
}
else
{
append_index(index);
}
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
access_chain_is_arrayed = true;
}
// For structs, the index refers to a constant, which indexes into the members.
// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
else if (type->basetype == SPIRType::Struct)
{
if (!index_is_literal)
index = get<SPIRConstant>(index).scalar();
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
BuiltIn builtin;
if (is_member_builtin(*type, index, &builtin))
{
if (access_chain_is_arrayed)
{
expr += ".";
expr += builtin_to_glsl(builtin, type->storage);
}
else
expr = builtin_to_glsl(builtin, type->storage);
}
else
{
// If the member has a qualified name, use it as the entire chain
string qual_mbr_name = get_member_qualified_name(type_id, index);
if (!qual_mbr_name.empty())
expr = qual_mbr_name;
else
expr += to_member_reference(base, *type, index, ptr_chain);
}
if (has_member_decoration(type->self, index, DecorationInvariant))
is_invariant = true;
is_packed = member_is_packed_physical_type(*type, index);
if (member_is_remapped_physical_type(*type, index))
physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
else
physical_type = 0;
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
}
// Matrix -> Vector
else if (type->columns > 1)
{
// If we have a row-major matrix here, we need to defer any transpose in case this access chain
// is used to store a column. We can resolve it right here and now if we access a scalar directly,
// by flipping indexing order of the matrix.
expr += "[";
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index, register_expression_read);
expr += "]";
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
// Vector -> Scalar
else if (type->vecsize > 1)
{
string deferred_index;
if (row_major_matrix_needs_conversion)
{
// Flip indexing order.
auto column_index = expr.find_last_of('[');
if (column_index != string::npos)
{
deferred_index = expr.substr(column_index);
expr.resize(column_index);
}
}
if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
{
expr += ".";
expr += index_to_swizzle(index);
}
else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
{
auto &c = get<SPIRConstant>(index);
expr += ".";
expr += index_to_swizzle(c.scalar());
}
else if (index_is_literal)
{
// For packed vectors, we can only access them as an array, not by swizzle.
expr += join("[", index, "]");
}
else
{
expr += "[";
expr += to_expression(index, register_expression_read);
expr += "]";
}
expr += deferred_index;
row_major_matrix_needs_conversion = false;
is_packed = false;
physical_type = 0;
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
else if (!backend.allow_truncated_access_chain)
SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
}
if (pending_array_enclose)
{
SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
"but the access chain was terminated in the middle of a multidimensional array. "
"This is not supported.");
}
if (meta)
{
meta->need_transpose = row_major_matrix_needs_conversion;
meta->storage_is_packed = is_packed;
meta->storage_is_invariant = is_invariant;
meta->storage_physical_type = physical_type;
}
return expr;
}
/* UE Change End: Fix loads from tessellation evaluation inputs not being forwarded to the stage_in structure array */
/* UE Change End: Fix loads from tessellation control inputs not being forwarded to the gl_in structure array */
// Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args)
{

View File

@ -288,6 +288,10 @@ public:
bool ios_use_framebuffer_fetch_subpasses = true;
/* UE Change End: Use Metal's native frame-buffer fetch API for subpass inputs. */
/* UE Change Begin: Storage buffer robustness - clamps access to SSBOs to the size of the buffer */
bool enforce_storge_buffer_bounds = false;
/* UE Change End: Storage buffer robustness - clamps access to SSBOs to the size of the buffer */
// Requires MSL 2.1, use the native support for texel buffers.
bool texture_buffer_native = false;
@ -594,7 +598,9 @@ protected:
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
bool is_packed) override;
/* UE Change Begin: Storage buffer robustness */
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta) override;
/* UE Change End: Storage buffer robustness */
void preprocess_op_codes();
void localize_global_variables();
void extract_global_variables_from_functions();
@ -850,6 +856,11 @@ protected:
/* UE Change Begin: Emulate texture2D atomic operations */
std::unordered_map<uint32_t, SPIRVariable*> image_pointers;
/* UE Change End: Emulate texture2D atomic operations */
/* UE Change Begin: Fix tessellation patch function processing */
std::unordered_map<uint32_t, uint32_t> invocation_ids;
std::unordered_set<uint32_t> variables_indexed_by_invocation;
bool passed_control_barrier = false;
/* UE Change End: Fix tessellation patch function processing */
bool suppress_missing_prototypes = false;
bool uses_atomics = false;
bool uses_resource_write = false;