From 79d63934a66b45df67bbdc763731530199ef089a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 17 Aug 2023 12:36:21 +0200 Subject: [PATCH] GLSL: Emit inline invariant position for mesh shaders. Work around missing feature from GLSL. Normally we can emit a global invariant gl_Position; and call it a day, but it does not work for mesh shaders it seems. Declaring invariance inside an explicit IO block works fine on the other hand ... --- .../asm/vert/invariant-block.sso.asm.vert | 4 +- .../shaders/asm/vert/invariant.sso.asm.vert | 4 +- ...ion-dx-style.vk.nocompat.spv14.asm.mesh.vk | 49 ++++++ ...nt-position-mesh.spv14.nocompat.vk.mesh.vk | 19 +++ .../asm/vert/invariant-block.sso.asm.vert | 4 +- .../shaders/asm/vert/invariant.sso.asm.vert | 4 +- ...sition-dx-style.vk.nocompat.spv14.asm.mesh | 151 ++++++++++++++++++ ...riant-position-mesh.spv14.nocompat.vk.mesh | 18 +++ spirv_glsl.cpp | 13 +- 9 files changed, 253 insertions(+), 13 deletions(-) create mode 100644 reference/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh.vk create mode 100644 reference/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh.vk create mode 100644 shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh create mode 100644 shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh diff --git a/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert b/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert index eb886941..a89e3362 100644 --- a/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert +++ b/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert @@ -2,14 +2,12 @@ out gl_PerVertex { - vec4 gl_Position; + invariant vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[1]; float gl_CullDistance[1]; }; -invariant gl_Position; - void main() { gl_Position = vec4(1.0); diff --git a/reference/opt/shaders/asm/vert/invariant.sso.asm.vert b/reference/opt/shaders/asm/vert/invariant.sso.asm.vert index 4f7e2f5f..7f9221d3 100644 --- a/reference/opt/shaders/asm/vert/invariant.sso.asm.vert +++ b/reference/opt/shaders/asm/vert/invariant.sso.asm.vert @@ -2,11 +2,9 @@ out gl_PerVertex { - vec4 gl_Position; + invariant vec4 gl_Position; }; -invariant gl_Position; - void main() { gl_Position = vec4(1.0); diff --git a/reference/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh.vk b/reference/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh.vk new file mode 100644 index 00000000..a2e0baae --- /dev/null +++ b/reference/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh.vk @@ -0,0 +1,49 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 8, triangles) out; + +out gl_MeshPerVertexEXT +{ + invariant vec4 gl_Position; +} gl_MeshVerticesEXT[]; + +struct _12 +{ + float _m0; +}; + +layout(location = 1) out vec4 B[24]; +layout(location = 3) perprimitiveEXT out vec4 C[8]; +shared float _32[64]; +taskPayloadSharedEXT _12 _38; + +void main() +{ + _32[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + barrier(); + SetMeshOutputsEXT(24u, 8u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _32[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _32[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _32[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _32[gl_LocalInvocationIndex]; + float _59 = _38._m0 + _32[gl_LocalInvocationIndex ^ 1u]; + B[gl_LocalInvocationIndex].x = _59; + B[gl_LocalInvocationIndex].y = _59; + B[gl_LocalInvocationIndex].z = _59; + B[gl_LocalInvocationIndex].w = _59; + if (gl_LocalInvocationIndex < 8u) + { + uint _65 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_65, _65 + 1u, _65 + 2u); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex); + uint _78 = gl_LocalInvocationIndex ^ 2u; + C[gl_LocalInvocationIndex].x = _32[_78]; + C[gl_LocalInvocationIndex].y = _32[_78]; + C[gl_LocalInvocationIndex].z = _32[_78]; + C[gl_LocalInvocationIndex].w = _32[_78]; + } +} + diff --git a/reference/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh.vk b/reference/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh.vk new file mode 100644 index 00000000..d07a110f --- /dev/null +++ b/reference/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh.vk @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +layout(max_vertices = 3, max_primitives = 1, triangles) out; + +out gl_MeshPerVertexEXT +{ + invariant vec4 gl_Position; +} gl_MeshVerticesEXT[]; + +void main() +{ + SetMeshOutputsEXT(3u, 1u); + gl_MeshVerticesEXT[0].gl_Position = vec4(1.0); + gl_MeshVerticesEXT[1].gl_Position = vec4(1.0); + gl_MeshVerticesEXT[2].gl_Position = vec4(1.0); + gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u); +} + diff --git a/reference/shaders/asm/vert/invariant-block.sso.asm.vert b/reference/shaders/asm/vert/invariant-block.sso.asm.vert index eb886941..a89e3362 100644 --- a/reference/shaders/asm/vert/invariant-block.sso.asm.vert +++ b/reference/shaders/asm/vert/invariant-block.sso.asm.vert @@ -2,14 +2,12 @@ out gl_PerVertex { - vec4 gl_Position; + invariant vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[1]; float gl_CullDistance[1]; }; -invariant gl_Position; - void main() { gl_Position = vec4(1.0); diff --git a/reference/shaders/asm/vert/invariant.sso.asm.vert b/reference/shaders/asm/vert/invariant.sso.asm.vert index e95338b0..3cb264a3 100644 --- a/reference/shaders/asm/vert/invariant.sso.asm.vert +++ b/reference/shaders/asm/vert/invariant.sso.asm.vert @@ -2,11 +2,9 @@ out gl_PerVertex { - vec4 gl_Position; + invariant vec4 gl_Position; }; -invariant gl_Position; - vec4 _main() { return vec4(1.0); diff --git a/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh b/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh new file mode 100644 index 00000000..bfd39285 --- /dev/null +++ b/shaders-no-opt/asm/mesh/invariant-position-dx-style.vk.nocompat.spv14.asm.mesh @@ -0,0 +1,151 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %SV_Position Invariant + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh b/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh new file mode 100644 index 00000000..9ce59eb9 --- /dev/null +++ b/shaders-no-opt/mesh/invariant-position-mesh.spv14.nocompat.vk.mesh @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(max_vertices = 3, max_primitives = 1, triangles) out; +layout(local_size_x = 1) in; + +out gl_MeshPerVertexEXT +{ + invariant vec4 gl_Position; +} gl_MeshVerticesEXT[3]; + +void main() +{ + SetMeshOutputsEXT(3, 1); + gl_MeshVerticesEXT[0].gl_Position = vec4(1.0); + gl_MeshVerticesEXT[1].gl_Position = vec4(1.0); + gl_MeshVerticesEXT[2].gl_Position = vec4(1.0); + gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2); +} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 690bc9d3..19c09a46 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3155,6 +3155,10 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) should_force = true; } + // Either glslang bug or oversight, but global invariant position does not work in mesh shaders. + if (get_execution_model() == ExecutionModelMeshEXT && position_invariant) + should_force = true; + return should_force; } @@ -3403,6 +3407,8 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo auto itr = builtin_xfb_offsets.find(BuiltInPosition); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); + else if (position_invariant) + statement("invariant vec4 gl_Position;"); else statement("vec4 gl_Position;"); } @@ -3499,6 +3505,8 @@ void CompilerGLSL::emit_resources() break; } + bool global_invariant_position = position_invariant && (options.es || options.version >= 120); + // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) { @@ -3509,11 +3517,13 @@ void CompilerGLSL::emit_resources() case ExecutionModelTessellationEvaluation: emit_declared_builtin_block(StorageClassInput, execution.model); emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; break; case ExecutionModelVertex: case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; break; default: @@ -3523,6 +3533,7 @@ void CompilerGLSL::emit_resources() else if (should_force_emit_builtin_block(StorageClassOutput)) { emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; } else if (execution.geometry_passthrough) { @@ -3543,7 +3554,7 @@ void CompilerGLSL::emit_resources() statement(""); } - if (position_invariant && (options.es || options.version >= 120)) + if (global_invariant_position) { statement("invariant gl_Position;"); statement("");