HLSL: Handle case where PerVertex block is not used.

This commit is contained in:
Hans-Kristian Arntzen 2022-11-02 13:07:32 +01:00
parent f5ef0a74fb
commit 3a066cd733
3 changed files with 239 additions and 15 deletions

View File

@ -0,0 +1,63 @@
struct _12
{
float _m0;
};
static uint gl_LocalInvocationIndex;
struct SPIRV_Cross_Input
{
uint gl_LocalInvocationIndex : SV_GroupIndex;
};
struct gl_MeshPerVertexEXT
{
float4 B : TEXCOORD1;
float4 gl_Position : SV_Position;
};
struct gl_MeshPerPrimitiveEXT
{
float4 C : TEXCOORD3;
uint gl_PrimitiveID : SV_PrimitiveID;
uint gl_Layer : SV_RenderTargetArrayIndex;
bool gl_CullPrimitiveEXT : SV_CullPrimitive;
};
groupshared float _9[64];
void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
{
_9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
GroupMemoryBarrierWithGroupSync();
SetMeshOutputCounts(24u, 8u);
gl_Position[gl_LocalInvocationIndex].x = _9[gl_LocalInvocationIndex];
gl_Position[gl_LocalInvocationIndex].y = _9[gl_LocalInvocationIndex];
gl_Position[gl_LocalInvocationIndex].z = _9[gl_LocalInvocationIndex];
gl_Position[gl_LocalInvocationIndex].w = _9[gl_LocalInvocationIndex];
float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u];
gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63;
if (gl_LocalInvocationIndex < 8u)
{
uint _71 = gl_LocalInvocationIndex * 3u;
gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u);
gl_CullPrimitiveEXT[gl_LocalInvocationIndex] = (gl_LocalInvocationIndex & 1u) != 0u;
gl_PrimitiveID[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex);
gl_Layer[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex);
uint _81 = gl_LocalInvocationIndex ^ 2u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81];
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81];
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81];
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81];
}
}
[outputtopology("triangle")]
[numthreads(2, 3, 4)]
void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
{
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT);
}

View File

@ -0,0 +1,150 @@
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38
OpExecutionMode %main OutputVertices 24
OpExecutionMode %main OutputPrimitivesNV 8
OpExecutionMode %main OutputTrianglesNV
OpExecutionMode %main LocalSize 2 3 4
OpName %main "main"
OpName %SV_Position "SV_Position"
OpName %B "B"
OpName %SV_CullPrimitive "SV_CullPrimitive"
OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
OpName %SV_PrimitiveID "SV_PrimitiveID"
OpName %C "C"
OpName %indices "indices"
OpName %_ ""
OpDecorate %SV_Position BuiltIn Position
OpDecorate %B Location 1
OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT
OpDecorate %SV_CullPrimitive PerPrimitiveNV
OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV
OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId
OpDecorate %SV_PrimitiveID PerPrimitiveNV
OpDecorate %C Location 3
OpDecorate %C PerPrimitiveNV
OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
%void = OpTypeVoid
%2 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%uint = OpTypeInt 32 0
%uint_24 = OpConstant %uint 24
%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24
%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24
%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
%B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
%bool = OpTypeBool
%uint_8 = OpConstant %uint 8
%_arr_bool_uint_8 = OpTypeArray %bool %uint_8
%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8
%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output
%_arr_uint_uint_8 = OpTypeArray %uint %uint_8
%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8
%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output
%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output
%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8
%C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output
%v3uint = OpTypeVector %uint 3
%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8
%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8
%indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output
%uint_64 = OpConstant %uint 64
%_arr_float_uint_64 = OpTypeArray %float %uint_64
%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64
%32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
%_ = OpTypeStruct %float
%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_
%38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%_ptr_Output_float = OpTypePointer Output %float
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_3 = OpConstant %uint 3
%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
%_ptr_Output_v3uint = OpTypePointer Output %v3uint
%_ptr_Output_bool = OpTypePointer Output %bool
%_ptr_Output_uint = OpTypePointer Output %uint
%main = OpFunction %void None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%35 = OpLoad %uint %gl_LocalInvocationIndex
%39 = OpConvertUToF %float %35
%41 = OpAccessChain %_ptr_Workgroup_float %32 %35
OpStore %41 %39
OpControlBarrier %uint_2 %uint_2 %uint_264
OpSetMeshOutputsEXT %uint_24 %uint_8
%44 = OpLoad %float %41
%46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0
OpStore %46 %44
%48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1
OpStore %48 %44
%50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2
OpStore %50 %44
%51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3
OpStore %51 %44
%53 = OpBitwiseXor %uint %35 %uint_1
%54 = OpAccessChain %_ptr_Workgroup_float %32 %53
%55 = OpLoad %float %54
%57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0
%58 = OpLoad %float %57
%59 = OpFAdd %float %58 %55
%60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0
OpStore %60 %59
%61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1
OpStore %61 %59
%62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2
OpStore %62 %59
%63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3
OpStore %63 %59
%64 = OpULessThan %bool %35 %uint_8
OpSelectionMerge %87 None
OpBranchConditional %64 %86 %87
%86 = OpLabel
%65 = OpIMul %uint %35 %uint_3
%66 = OpIAdd %uint %65 %uint_1
%67 = OpIAdd %uint %65 %uint_2
%68 = OpCompositeConstruct %v3uint %65 %66 %67
%70 = OpAccessChain %_ptr_Output_v3uint %indices %35
OpStore %70 %68
%71 = OpBitwiseAnd %uint %35 %uint_1
%72 = OpINotEqual %bool %71 %uint_0
%74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35
OpStore %74 %72
%76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35
OpStore %76 %35
%77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35
OpStore %77 %35
%78 = OpBitwiseXor %uint %35 %uint_2
%79 = OpAccessChain %_ptr_Workgroup_float %32 %78
%80 = OpLoad %float %79
%81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0
OpStore %81 %80
%82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1
OpStore %82 %80
%83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2
OpStore %83 %80
%84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3
OpStore %84 %80
OpBranch %87
%87 = OpLabel
OpReturn
OpFunctionEnd

View File

@ -2309,6 +2309,7 @@ void CompilerHLSL::analyze_meshlet_writes()
uint32_t id_per_vertex = 0;
uint32_t id_per_primitive = 0;
bool need_per_primitive = false;
bool need_per_vertex = false;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
@ -2331,13 +2332,16 @@ void CompilerHLSL::analyze_meshlet_writes()
if (flags.get(DecorationPerPrimitiveEXT))
need_per_primitive = true;
else
need_per_vertex = true;
}
});
// If we have per-primitive outputs, and no per-primitive builtins,
// empty version of gl_MeshPerPrimitiveEXT will be emitted
if (id_per_primitive == 0 && need_per_primitive)
{
// empty version of gl_MeshPerPrimitiveEXT will be emitted.
// If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
auto &execution = get_entry_point();
uint32_t op_type = ir.increase_bound_by(4);
@ -2345,32 +2349,39 @@ void CompilerHLSL::analyze_meshlet_writes()
uint32_t op_ptr = op_type + 2;
uint32_t op_var = op_type + 3;
auto& type = set<SPIRType>(op_type);
auto &type = set<SPIRType>(op_type);
type.basetype = SPIRType::Struct;
set_name(op_type, "gl_MeshPerPrimitiveEXT");
set_name(op_type, block_name);
set_decoration(op_type, DecorationBlock);
set_decoration(op_type, DecorationPerPrimitiveEXT);
if (per_primitive)
set_decoration(op_type, DecorationPerPrimitiveEXT);
auto& arr = set<SPIRType>(op_arr, type);
auto &arr = set<SPIRType>(op_arr, type);
arr.parent_type = type.self;
arr.array.push_back(execution.output_primitives);
arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
arr.array_size_literal.push_back(true);
auto& ptr = set<SPIRType>(op_ptr, arr);
auto &ptr = set<SPIRType>(op_ptr, arr);
ptr.parent_type = arr.self;
ptr.pointer = true;
ptr.pointer_depth++;
ptr.storage = StorageClassOutput;
set_decoration(op_ptr, DecorationBlock);
set_name(op_ptr, "gl_MeshPerPrimitiveEXT");
set_name(op_ptr, block_name);
auto& var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
set_decoration(op_var, DecorationPerPrimitiveEXT);
set_name(op_var, "gl_MeshPrimitivesEXT");
auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
if (per_primitive)
set_decoration(op_var, DecorationPerPrimitiveEXT);
set_name(op_var, instance_name);
execution.interface_variables.push_back(var.self);
id_per_primitive = op_var;
}
return op_var;
};
if (id_per_vertex == 0 && need_per_vertex)
id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
if (id_per_primitive == 0 && need_per_primitive)
id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
unordered_set<uint32_t> processed_func_ids;
analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);