Unflatten inputs when copying to outputs.

This should fix a whole host of issues related to structs in the `Input`
class in a tessellation control shader.

Also, use pointer arithmetic instead of dereferencing the `ops` array.
This is critical in case we wind up stepping beyond the bounds of the
array.
This commit is contained in:
Chip Davis 2019-02-13 12:36:12 -06:00
parent 83b7e66218
commit 13df78bebf
4 changed files with 194 additions and 10 deletions

View File

@ -0,0 +1,40 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Boo
{
float3 a;
float3 b;
};
struct main0_out
{
Boo vVertex;
};
struct main0_in
{
float3 Boo_a [[attribute(0)]];
float3 Boo_b [[attribute(1)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
Boo vInput_24;
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
gl_out[gl_InvocationID].vVertex = vInput_24;
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
}

View File

@ -0,0 +1,40 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Boo
{
float3 a;
float3 b;
};
struct main0_out
{
Boo vVertex;
};
struct main0_in
{
float3 Boo_a [[attribute(0)]];
float3 Boo_b [[attribute(1)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
Boo vInput_24;
vInput_24.a = gl_in[gl_InvocationID].Boo_a;
vInput_24.b = gl_in[gl_InvocationID].Boo_b;
gl_out[gl_InvocationID].vVertex = vInput_24;
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
}

View File

@ -0,0 +1,22 @@
#version 450
struct Boo
{
vec3 a;
vec3 b;
};
layout(vertices = 4) out;
layout(location = 0) out Boo vVertex[];
layout(location = 0) in Boo vInput[];
void main()
{
vVertex[gl_InvocationID] = vInput[gl_InvocationID];
gl_TessLevelOuter[0] = 1.0;
gl_TessLevelOuter[1] = 2.0;
gl_TessLevelOuter[2] = 3.0;
gl_TessLevelOuter[3] = 4.0;
gl_TessLevelInner[0] = 1.0;
gl_TessLevelInner[1] = 2.0;
}

View File

@ -3357,23 +3357,25 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t const_mbr_id = next_id++;
uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
if (var->storage == StorageClassInput ||
has_decoration(get_variable_element_type(*var).self, DecorationBlock))
{
uint32_t i = 4;
if (index == uint32_t(-1))
auto *type = &get_variable_element_type(*var);
if (index == uint32_t(-1) && length >= 5)
{
// Maybe this is a struct type in the input class, in which case
// we put it as a decoration on the corresponding member.
index =
get_extended_member_decoration(ops[2], ops[4], SPIRVCrossDecorationInterfaceMemberIndex);
index = get_extended_member_decoration(ops[2], get_constant(ops[4]).scalar(),
SPIRVCrossDecorationInterfaceMemberIndex);
assert(index != uint32_t(-1));
i++;
type = &get<SPIRType>(type->member_types[get_constant(ops[4]).scalar()]);
}
// In this case, we flattened structures and arrays, so now we have to
// combine the following indices. If we encounter a non-constant index,
// we're hosed.
auto *type = &get_variable_element_type(*var);
for (; i < length; ++i)
{
if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct)
@ -3383,30 +3385,110 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
index += c.scalar();
if (type->parent_type)
type = &get<SPIRType>(type->parent_type);
else if (type->basetype == SPIRType::Struct)
type = &get<SPIRType>(type->member_types[c.scalar()]);
}
set<SPIRConstant>(const_mbr_id, type_id, index, false);
indices.push_back(const_mbr_id);
// If the access chain terminates at a composite type, the composite
// itself might be copied. In that case, we must unflatten it.
if (is_matrix(*type) || is_array(*type) || type->basetype == SPIRType::Struct)
{
std::string temp_name = join(to_name(var->self), "_", ops[1]);
statement(variable_decl(*type, temp_name, var->self), ";");
// Set up the initializer for this temporary variable.
indices.push_back(const_mbr_id);
if (type->basetype == SPIRType::Struct)
{
for (uint32_t j = 0; j < type->member_types.size(); j++)
{
index =
get_extended_member_decoration(ops[2], j, SPIRVCrossDecorationInterfaceMemberIndex);
const auto &mbr_type = get<SPIRType>(type->member_types[j]);
if (is_matrix(mbr_type))
{
for (uint32_t k = 0; k < mbr_type.columns; k++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e =
access_chain(ptr, indices.data(), indices.size(), mbr_type, nullptr, true);
statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";");
}
}
else if (is_array(mbr_type))
{
for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e =
access_chain(ptr, indices.data(), indices.size(), mbr_type, nullptr, true);
statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";");
}
}
else
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), indices.size(), mbr_type, nullptr, true);
statement(temp_name, ".", to_member_name(*type, j), " = ", e, ";");
}
}
}
else if (is_matrix(*type))
{
for (uint32_t j = 0; j < type->columns; j++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), indices.size(), *type, nullptr, true);
statement(temp_name, "[", j, "] = ", e, ";");
}
}
else // Must be an array
{
assert(is_array(*type));
for (uint32_t j = 0; j < type->array[0]; j++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), indices.size(), *type, nullptr, true);
statement(temp_name, "[", j, "] = ", e, ";");
}
}
indices.insert(indices.end(), &ops[i], &ops[length]);
// This needs to be a variable instead of an expression so we don't
// try to dereference this as a variable pointer.
set<SPIRVariable>(ops[1], ops[0], var->storage);
ir.meta[ops[1]] = ir.meta[ops[2]];
set_name(ops[1], temp_name);
if (has_decoration(var->self, DecorationInvariant))
set_decoration(ops[1], DecorationInvariant);
for (uint32_t j = 2; j < length; j++)
inherit_expression_dependencies(ops[1], ops[j]);
break;
}
else
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
indices.push_back(const_mbr_id);
if (i < length)
indices.insert(indices.end(), ops + i, ops + length);
}
}
else
{
assert(index != uint32_t(-1));
set<SPIRConstant>(const_mbr_id, type_id, index, false);
indices.push_back(const_mbr_id);
indices.insert(indices.end(), &ops[4], &ops[length]);
indices.insert(indices.end(), ops + 4, ops + length);
}
// We use the pointer to the base of the input/output array here,
// so this is always a pointer chain.
uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
auto e = access_chain(ptr, indices.data(), indices.size(), get<SPIRType>(ops[0]), &meta, true);
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
expr.loaded_from = var->self;
expr.need_transpose = meta.need_transpose;
expr.access_chain = true;
// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
// Mark the result as being packed if necessary.
if (meta.storage_is_packed)
set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
if (meta.storage_packed_type != 0)