Never flatten outputs when capturing them.
There's no need to do so, since these are not stage-out structs being returned, but regular structures being written to a buffer. This also neatly avoids issues writing to composite (e.g. arrayed) per-patch outputs from a tessellation control shader.
This commit is contained in:
parent
8860a97d4a
commit
0bb6bbda22
@ -0,0 +1,42 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float3 vVertex;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
float3 vPatch[2];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float3 vInput [[attribute(0)]];
|
||||
};
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
||||
threadgroup_barrier(mem_flags::mem_device);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
patchOut.vPatch[0] = float3(10.0);
|
||||
patchOut.vPatch[1] = float3(20.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
|
||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
|
||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,42 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float3 vVertex;
|
||||
};
|
||||
|
||||
struct main0_patchOut
|
||||
{
|
||||
float3 vPatch[2];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float3 vInput [[attribute(0)]];
|
||||
};
|
||||
|
||||
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
||||
{
|
||||
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
|
||||
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
||||
if (gl_InvocationID < spvIndirectParams[0])
|
||||
gl_in[gl_InvocationID] = in;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
|
||||
threadgroup_barrier(mem_flags::mem_device);
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
patchOut.vPatch[0] = float3(10.0);
|
||||
patchOut.vPatch[1] = float3(20.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
|
||||
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
|
||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
|
||||
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,27 @@
|
||||
#version 450
|
||||
|
||||
layout(vertices = 4) out;
|
||||
layout(location = 0) patch out vec3 vPatch[2];
|
||||
layout(location = 2) out vec3 vVertex[];
|
||||
layout(location = 0) in vec3 vInput[];
|
||||
|
||||
void main()
|
||||
{
|
||||
vVertex[gl_InvocationID] =
|
||||
vInput[gl_InvocationID] +
|
||||
vInput[gl_InvocationID ^ 1];
|
||||
|
||||
barrier();
|
||||
|
||||
if (gl_InvocationID == 0)
|
||||
{
|
||||
vPatch[0] = vec3(10.0);
|
||||
vPatch[1] = vec3(20.0);
|
||||
gl_TessLevelOuter[0] = 1.0;
|
||||
gl_TessLevelOuter[1] = 2.0;
|
||||
gl_TessLevelOuter[2] = 3.0;
|
||||
gl_TessLevelOuter[3] = 4.0;
|
||||
gl_TessLevelInner[0] = 1.0;
|
||||
gl_TessLevelInner[1] = 2.0;
|
||||
}
|
||||
}
|
@ -1044,12 +1044,14 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
|
||||
uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
|
||||
var.basetype = type_id;
|
||||
|
||||
auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
|
||||
type_id = get_pointee_type_id(var.basetype);
|
||||
if (strip_array && is_array(get<SPIRType>(type_id)))
|
||||
type_id = get<SPIRType>(type_id).parent_type;
|
||||
auto &type = get<SPIRType>(type_id);
|
||||
uint32_t target_components = 0;
|
||||
uint32_t type_components = type.vecsize;
|
||||
bool padded_output = false;
|
||||
|
||||
type_id = type.self;
|
||||
// Check if we need to pad fragment output to match a certain number of components.
|
||||
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
|
||||
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
|
||||
@ -1532,7 +1534,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
|
||||
|
||||
if (var_type.basetype == SPIRType::Struct)
|
||||
{
|
||||
if (!is_builtin_type(var_type) && !strip_array)
|
||||
if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array)
|
||||
{
|
||||
// For I/O blocks or structs, we will need to pass the block itself around
|
||||
// to functions if they are used globally in leaf functions.
|
||||
@ -1544,14 +1546,13 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
|
||||
vars_needing_early_declaration.push_back(var.self);
|
||||
}
|
||||
|
||||
// Per-vertex outputs in a tess. control shader need special handling.
|
||||
if (strip_array && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
|
||||
if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
|
||||
{
|
||||
// We can't flatten the struct, because of the requirement that these be passed around as arrays.
|
||||
// In Metal, the interface block itself is arrayed. This makes things very complicated, since
|
||||
// stage-in structures in MSL don't support nested structures. Luckily, for stage-out in a
|
||||
// tessellation control shader, we can get away with this because the structure is stored
|
||||
// to a buffer, not returned.
|
||||
// In Metal tessellation shaders, the interface block itself is arrayed. This makes things
|
||||
// very complicated, since stage-in structures in MSL don't support nested structures.
|
||||
// Luckily, for stage-out when capturing output, we can avoid this and just add
|
||||
// composite members directly, because the stage-out structure is stored to a buffer,
|
||||
// not returned.
|
||||
add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
|
||||
}
|
||||
else
|
||||
@ -1589,7 +1590,8 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
|
||||
if (!is_builtin || has_active_builtin(builtin, storage))
|
||||
{
|
||||
// MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
|
||||
if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) &&
|
||||
if (!is_builtin &&
|
||||
(storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) &&
|
||||
(is_matrix(var_type) || is_array(var_type)))
|
||||
{
|
||||
add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
|
||||
@ -5248,9 +5250,9 @@ string CompilerMSL::entry_point_args(bool append_comma)
|
||||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
ep_args += join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ",
|
||||
patch_output_buffer_var_name, " [[buffer(",
|
||||
convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
|
||||
ep_args +=
|
||||
join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name,
|
||||
" [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
|
||||
}
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
|
Loading…
Reference in New Issue
Block a user