Never flatten outputs when capturing them.

There's no need to do so, since these are not stage-out structs being
returned, but regular structures being written to a buffer. This also
neatly avoids issues writing to composite (e.g. arrayed) per-patch
outputs from a tessellation control shader.
This commit is contained in:
Chip Davis 2019-02-11 17:18:54 -06:00
parent 8860a97d4a
commit 0bb6bbda22
4 changed files with 127 additions and 14 deletions

View File

@ -0,0 +1,42 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float3 vVertex;
};
struct main0_patchOut
{
float3 vPatch[2];
};
struct main0_in
{
float3 vInput [[attribute(0)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
threadgroup_barrier(mem_flags::mem_device);
if (gl_InvocationID == 0)
{
patchOut.vPatch[0] = float3(10.0);
patchOut.vPatch[1] = float3(20.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
}
}

View File

@ -0,0 +1,42 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float3 vVertex;
};
struct main0_patchOut
{
float3 vPatch[2];
};
struct main0_in
{
float3 vInput [[attribute(0)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
threadgroup_barrier(mem_flags::mem_device);
if (gl_InvocationID == 0)
{
patchOut.vPatch[0] = float3(10.0);
patchOut.vPatch[1] = float3(20.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
}
}

View File

@ -0,0 +1,27 @@
#version 450
layout(vertices = 4) out;
layout(location = 0) patch out vec3 vPatch[2];
layout(location = 2) out vec3 vVertex[];
layout(location = 0) in vec3 vInput[];
void main()
{
vVertex[gl_InvocationID] =
vInput[gl_InvocationID] +
vInput[gl_InvocationID ^ 1];
barrier();
if (gl_InvocationID == 0)
{
vPatch[0] = vec3(10.0);
vPatch[1] = vec3(20.0);
gl_TessLevelOuter[0] = 1.0;
gl_TessLevelOuter[1] = 2.0;
gl_TessLevelOuter[2] = 3.0;
gl_TessLevelOuter[3] = 4.0;
gl_TessLevelInner[0] = 1.0;
gl_TessLevelInner[1] = 2.0;
}
}

View File

@ -1044,12 +1044,14 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
var.basetype = type_id;
auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
type_id = get_pointee_type_id(var.basetype);
if (strip_array && is_array(get<SPIRType>(type_id)))
type_id = get<SPIRType>(type_id).parent_type;
auto &type = get<SPIRType>(type_id);
uint32_t target_components = 0;
uint32_t type_components = type.vecsize;
bool padded_output = false;
type_id = type.self;
// Check if we need to pad fragment output to match a certain number of components.
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
@ -1532,7 +1534,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
if (var_type.basetype == SPIRType::Struct)
{
if (!is_builtin_type(var_type) && !strip_array)
if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array)
{
// For I/O blocks or structs, we will need to pass the block itself around
// to functions if they are used globally in leaf functions.
@ -1544,14 +1546,13 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
vars_needing_early_declaration.push_back(var.self);
}
// Per-vertex outputs in a tess. control shader need special handling.
if (strip_array && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
{
// We can't flatten the struct, because of the requirement that these be passed around as arrays.
// In Metal, the interface block itself is arrayed. This makes things very complicated, since
// stage-in structures in MSL don't support nested structures. Luckily, for stage-out in a
// tessellation control shader, we can get away with this because the structure is stored
// to a buffer, not returned.
// In Metal tessellation shaders, the interface block itself is arrayed. This makes things
// very complicated, since stage-in structures in MSL don't support nested structures.
// Luckily, for stage-out when capturing output, we can avoid this and just add
// composite members directly, because the stage-out structure is stored to a buffer,
// not returned.
add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
}
else
@ -1589,7 +1590,8 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
if (!is_builtin || has_active_builtin(builtin, storage))
{
// MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) &&
if (!is_builtin &&
(storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) &&
(is_matrix(var_type) || is_array(var_type)))
{
add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
@ -5248,9 +5250,9 @@ string CompilerMSL::entry_point_args(bool append_comma)
{
if (!ep_args.empty())
ep_args += ", ";
ep_args += join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ",
patch_output_buffer_var_name, " [[buffer(",
convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
ep_args +=
join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name,
" [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
}
if (!ep_args.empty())
ep_args += ", ";