Merge pull request #2012 from KhronosGroup/mesh-shaders

GLSL: Implement GL_EXT_mesh_shader
This commit is contained in:
Hans-Kristian Arntzen 2022-09-05 13:19:32 +02:00 committed by GitHub
commit 210a800130
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1133 additions and 27 deletions

View File

@ -2,7 +2,7 @@
# Copyright 2016-2021 The Khronos Group Inc.
# SPDX-License-Identifier: Apache-2.0
GLSLANG_REV=69ae9e7460499b488cb2d32edae623a95264db14
GLSLANG_REV=a53aa3e94f8b4bf22a6eb6e8a207fe91ab02b989
SPIRV_TOOLS_REV=4c456f7da67c5437a6fb7d4d20d78e2a5ae2acf2
SPIRV_HEADERS_REV=87d5b782bec60822aa878941e6b13c0a9a954c9b
PROTOCOL=https

View File

@ -1082,6 +1082,10 @@ static ExecutionModel stage_to_execution_model(const std::string &stage)
return ExecutionModelMissKHR;
else if (stage == "rcall")
return ExecutionModelCallableKHR;
else if (stage == "mesh")
return spv::ExecutionModelMeshEXT;
else if (stage == "task")
return spv::ExecutionModelTaskEXT;
else
SPIRV_CROSS_THROW("Invalid stage.");
}

View File

@ -3,7 +3,7 @@
layout(location = 0) out vec2 value;
layout(location = 0) pervertexEXT in vec2 vUV[3];
layout(location = 1) pervertexEXT in vec2 vUV2[3];
layout(location = 3) pervertexEXT in vec2 vUV2[3];
void main()
{

View File

@ -0,0 +1,66 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, lines) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
vec3 _29 = vec3(gl_GlobalInvocationID);
float _31 = _29.x;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex);
int _129 = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _129;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _129 + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _129 + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _129 + 3;
}
}

View File

@ -0,0 +1,66 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, points) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
vec3 _29 = vec3(gl_GlobalInvocationID);
float _31 = _29.x;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
int _124 = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _124;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _124 + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _124 + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _124 + 3;
}
}

View File

@ -0,0 +1,66 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, triangles) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
vec3 _29 = vec3(gl_GlobalInvocationID);
float _31 = _29.x;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex);
int _128 = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _128;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _128 + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _128 + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _128 + 3;
}
}

View File

@ -0,0 +1,42 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
struct Payload
{
float v[3];
};
shared float vs[24];
taskPayloadSharedEXT Payload p;
void main()
{
vs[gl_LocalInvocationIndex] = 10.0;
barrier();
if (gl_LocalInvocationIndex < 12u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
}
barrier();
if (gl_LocalInvocationIndex < 6u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
}
barrier();
if (gl_LocalInvocationIndex < 3u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
}
barrier();
p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
if (vs[5] > 20.0)
{
EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
}
else
{
EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u);
}
}

View File

@ -0,0 +1,35 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
struct Payload
{
float v[3];
};
shared float vs[24];
taskPayloadSharedEXT Payload p;
void main()
{
vs[gl_LocalInvocationIndex] = 10.0;
barrier();
if (gl_LocalInvocationIndex < 12u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
}
barrier();
if (gl_LocalInvocationIndex < 6u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
}
barrier();
if (gl_LocalInvocationIndex < 3u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
}
barrier();
p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
}

View File

@ -0,0 +1,35 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
struct Payload
{
float v[3];
};
shared float vs[24];
taskPayloadSharedEXT Payload p;
void main()
{
vs[gl_LocalInvocationIndex] = 10.0;
barrier();
if (gl_LocalInvocationIndex < 12u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
}
barrier();
if (gl_LocalInvocationIndex < 6u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
}
barrier();
if (gl_LocalInvocationIndex < 3u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
}
barrier();
p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
}

View File

@ -3,7 +3,7 @@
layout(location = 0) out vec2 value;
layout(location = 0) pervertexEXT in vec2 vUV[3];
layout(location = 1) pervertexEXT in vec2 vUV2[3];
layout(location = 3) pervertexEXT in vec2 vUV2[3];
void main()
{

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, lines) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, points) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_fragment_shading_rate : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 22, triangles) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
struct TaskPayload
{
float a;
float b;
int c;
};
layout(location = 0) out vec4 vOut[24];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[24];
layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[22];
taskPayloadSharedEXT TaskPayload payload;
shared float shared_float[16];
void main()
{
SetMeshOutputsEXT(24u, 22u);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22u)
{
vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

View File

@ -0,0 +1,42 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
struct Payload
{
float v[3];
};
shared float vs[24];
taskPayloadSharedEXT Payload p;
void main()
{
vs[gl_LocalInvocationIndex] = 10.0;
barrier();
if (gl_LocalInvocationIndex < 12u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
}
barrier();
if (gl_LocalInvocationIndex < 6u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
}
barrier();
if (gl_LocalInvocationIndex < 3u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
}
barrier();
p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
if (vs[5] > 20.0)
{
EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
}
else
{
EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u);
}
}

View File

@ -0,0 +1,35 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
struct Payload
{
float v[3];
};
shared float vs[24];
taskPayloadSharedEXT Payload p;
void main()
{
vs[gl_LocalInvocationIndex] = 10.0;
barrier();
if (gl_LocalInvocationIndex < 12u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
}
barrier();
if (gl_LocalInvocationIndex < 6u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
}
barrier();
if (gl_LocalInvocationIndex < 3u)
{
vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
}
barrier();
p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
}

View File

@ -0,0 +1,132 @@
; SPIR-V
; Version: 1.4
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 93
; Schema: 0
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint TaskEXT %main "main" %vs %gl_LocalInvocationIndex %p
OpExecutionMode %main LocalSize 4 3 2
OpSource GLSL 450
OpSourceExtension "GL_EXT_mesh_shader"
OpName %main "main"
OpName %vs "vs"
OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
OpName %Payload "Payload"
OpMemberName %Payload 0 "v"
OpName %p "p"
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%uint = OpTypeInt 32 0
%uint_24 = OpConstant %uint 24
%_arr_float_uint_24 = OpTypeArray %float %uint_24
%_ptr_Workgroup__arr_float_uint_24 = OpTypePointer Workgroup %_arr_float_uint_24
%vs = OpVariable %_ptr_Workgroup__arr_float_uint_24 Workgroup
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
%float_10 = OpConstant %float 10
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%uint_12 = OpConstant %uint 12
%bool = OpTypeBool
%uint_6 = OpConstant %uint 6
%uint_3 = OpConstant %uint 3
%_arr_float_uint_3 = OpTypeArray %float %uint_3
%Payload = OpTypeStruct %_arr_float_uint_3
%_ptr_TaskPayloadWorkgroupEXT_Payload = OpTypePointer TaskPayloadWorkgroupEXT %Payload
%p = OpVariable %_ptr_TaskPayloadWorkgroupEXT_Payload TaskPayloadWorkgroupEXT
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
%int_4 = OpConstant %int 4
%int_6 = OpConstant %int 6
%int_8 = OpConstant %int 8
%v3uint = OpTypeVector %uint 3
%uint_4 = OpConstant %uint 4
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_3 %uint_2
%main = OpFunction %void None %3
%5 = OpLabel
%14 = OpLoad %uint %gl_LocalInvocationIndex
%17 = OpAccessChain %_ptr_Workgroup_float %vs %14
OpStore %17 %float_10
OpControlBarrier %uint_2 %uint_2 %uint_264
%20 = OpLoad %uint %gl_LocalInvocationIndex
%23 = OpULessThan %bool %20 %uint_12
OpSelectionMerge %25 None
OpBranchConditional %23 %24 %25
%24 = OpLabel
%26 = OpLoad %uint %gl_LocalInvocationIndex
%27 = OpLoad %uint %gl_LocalInvocationIndex
%28 = OpIAdd %uint %27 %uint_12
%29 = OpAccessChain %_ptr_Workgroup_float %vs %28
%30 = OpLoad %float %29
%31 = OpAccessChain %_ptr_Workgroup_float %vs %26
%32 = OpLoad %float %31
%33 = OpFAdd %float %32 %30
%34 = OpAccessChain %_ptr_Workgroup_float %vs %26
OpStore %34 %33
OpBranch %25
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%35 = OpLoad %uint %gl_LocalInvocationIndex
%37 = OpULessThan %bool %35 %uint_6
OpSelectionMerge %39 None
OpBranchConditional %37 %38 %39
%38 = OpLabel
%40 = OpLoad %uint %gl_LocalInvocationIndex
%41 = OpLoad %uint %gl_LocalInvocationIndex
%42 = OpIAdd %uint %41 %uint_6
%43 = OpAccessChain %_ptr_Workgroup_float %vs %42
%44 = OpLoad %float %43
%45 = OpAccessChain %_ptr_Workgroup_float %vs %40
%46 = OpLoad %float %45
%47 = OpFAdd %float %46 %44
%48 = OpAccessChain %_ptr_Workgroup_float %vs %40
OpStore %48 %47
OpBranch %39
%39 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%49 = OpLoad %uint %gl_LocalInvocationIndex
%51 = OpULessThan %bool %49 %uint_3
OpSelectionMerge %53 None
OpBranchConditional %51 %52 %53
%52 = OpLabel
%54 = OpLoad %uint %gl_LocalInvocationIndex
%55 = OpLoad %uint %gl_LocalInvocationIndex
%56 = OpIAdd %uint %55 %uint_3
%57 = OpAccessChain %_ptr_Workgroup_float %vs %56
%58 = OpLoad %float %57
%59 = OpAccessChain %_ptr_Workgroup_float %vs %54
%60 = OpLoad %float %59
%61 = OpFAdd %float %60 %58
%62 = OpAccessChain %_ptr_Workgroup_float %vs %54
OpStore %62 %61
OpBranch %53
%53 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%69 = OpLoad %uint %gl_LocalInvocationIndex
%70 = OpLoad %uint %gl_LocalInvocationIndex
%71 = OpAccessChain %_ptr_Workgroup_float %vs %70
%72 = OpLoad %float %71
%74 = OpAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %p %int_0 %69
OpStore %74 %72
%76 = OpAccessChain %_ptr_Workgroup_float %vs %int_4
%77 = OpLoad %float %76
%78 = OpConvertFToS %int %77
%79 = OpBitcast %uint %78
%81 = OpAccessChain %_ptr_Workgroup_float %vs %int_6
%82 = OpLoad %float %81
%83 = OpConvertFToS %int %82
%84 = OpBitcast %uint %83
%86 = OpAccessChain %_ptr_Workgroup_float %vs %int_8
%87 = OpLoad %float %86
%88 = OpConvertFToS %int %87
%89 = OpBitcast %uint %88
OpEmitMeshTasksEXT %79 %84 %89 %p
OpFunctionEnd

View File

@ -3,7 +3,7 @@
layout(location = 0) out vec2 value;
layout(location = 0) pervertexEXT in vec2 vUV[3];
layout(location = 1) pervertexEXT in vec2 vUV2[3];
layout(location = 3) pervertexEXT in vec2 vUV2[3];
void main () {
value = gl_BaryCoordEXT.x * vUV[0] + gl_BaryCoordEXT.y * vUV[1] + gl_BaryCoordEXT.z * vUV[2];

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(lines, max_vertices = 24, max_primitives = 22) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
layout(location = 0) out vec4 vOut[];
layout(location = 1) perprimitiveEXT out vec4 vPrim[];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[];
shared float shared_float[16];
struct TaskPayload
{
float a;
float b;
int c;
};
taskPayloadSharedEXT TaskPayload payload;
void main()
{
SetMeshOutputsEXT(24, 22);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22)
{
vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(points, max_vertices = 24, max_primitives = 22) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
layout(location = 0) out vec4 vOut[];
layout(location = 1) perprimitiveEXT out vec4 vPrim[];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[];
shared float shared_float[16];
struct TaskPayload
{
float a;
float b;
int c;
};
taskPayloadSharedEXT TaskPayload payload;
void main()
{
SetMeshOutputsEXT(24, 22);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22)
{
vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

View File

@ -0,0 +1,63 @@
#version 450
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(triangles, max_vertices = 24, max_primitives = 22) out;
out gl_MeshPerVertexEXT
{
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[1];
float gl_CullDistance[2];
} gl_MeshVerticesEXT[];
layout(location = 0) out vec4 vOut[];
layout(location = 1) perprimitiveEXT out vec4 vPrim[];
layout(location = 2) out BlockOut
{
vec4 a;
vec4 b;
} outputs[];
layout(location = 4) perprimitiveEXT out BlockOutPrim
{
vec4 a;
vec4 b;
} prim_outputs[];
shared float shared_float[16];
struct TaskPayload
{
float a;
float b;
int c;
};
taskPayloadSharedEXT TaskPayload payload;
void main()
{
SetMeshOutputsEXT(24, 22);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
outputs[gl_LocalInvocationIndex].a = vec4(5.0);
outputs[gl_LocalInvocationIndex].b = vec4(6.0);
barrier();
if (gl_LocalInvocationIndex < 22)
{
vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
}
}

23
spirv.h
View File

@ -98,6 +98,8 @@ typedef enum SpvExecutionModel_ {
SpvExecutionModelMissNV = 5317,
SpvExecutionModelCallableKHR = 5318,
SpvExecutionModelCallableNV = 5318,
SpvExecutionModelTaskEXT = 5364,
SpvExecutionModelMeshEXT = 5365,
SpvExecutionModelMax = 0x7fffffff,
} SpvExecutionModel;
@ -165,11 +167,21 @@ typedef enum SpvExecutionMode_ {
SpvExecutionModeSignedZeroInfNanPreserve = 4461,
SpvExecutionModeRoundingModeRTE = 4462,
SpvExecutionModeRoundingModeRTZ = 4463,
SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
SpvExecutionModeStencilRefReplacingEXT = 5027,
SpvExecutionModeStencilRefUnchangedFrontAMD = 5079,
SpvExecutionModeStencilRefGreaterFrontAMD = 5080,
SpvExecutionModeStencilRefLessFrontAMD = 5081,
SpvExecutionModeStencilRefUnchangedBackAMD = 5082,
SpvExecutionModeStencilRefGreaterBackAMD = 5083,
SpvExecutionModeStencilRefLessBackAMD = 5084,
SpvExecutionModeOutputLinesEXT = 5269,
SpvExecutionModeOutputLinesNV = 5269,
SpvExecutionModeOutputPrimitivesEXT = 5270,
SpvExecutionModeOutputPrimitivesNV = 5270,
SpvExecutionModeDerivativeGroupQuadsNV = 5289,
SpvExecutionModeDerivativeGroupLinearNV = 5290,
SpvExecutionModeOutputTrianglesEXT = 5298,
SpvExecutionModeOutputTrianglesNV = 5298,
SpvExecutionModePixelInterlockOrderedEXT = 5366,
SpvExecutionModePixelInterlockUnorderedEXT = 5367,
@ -219,6 +231,7 @@ typedef enum SpvStorageClass_ {
SpvStorageClassShaderRecordBufferNV = 5343,
SpvStorageClassPhysicalStorageBuffer = 5349,
SpvStorageClassPhysicalStorageBufferEXT = 5349,
SpvStorageClassTaskPayloadWorkgroupEXT = 5402,
SpvStorageClassCodeSectionINTEL = 5605,
SpvStorageClassDeviceOnlyINTEL = 5936,
SpvStorageClassHostOnlyINTEL = 5937,
@ -501,6 +514,7 @@ typedef enum SpvDecoration_ {
SpvDecorationPassthroughNV = 5250,
SpvDecorationViewportRelativeNV = 5252,
SpvDecorationSecondaryViewportRelativeNV = 5256,
SpvDecorationPerPrimitiveEXT = 5271,
SpvDecorationPerPrimitiveNV = 5271,
SpvDecorationPerViewNV = 5272,
SpvDecorationPerTaskNV = 5273,
@ -650,6 +664,10 @@ typedef enum SpvBuiltIn_ {
SpvBuiltInFragmentSizeNV = 5292,
SpvBuiltInFragInvocationCountEXT = 5293,
SpvBuiltInInvocationsPerPixelNV = 5293,
SpvBuiltInPrimitivePointIndicesEXT = 5294,
SpvBuiltInPrimitiveLineIndicesEXT = 5295,
SpvBuiltInPrimitiveTriangleIndicesEXT = 5296,
SpvBuiltInCullPrimitiveEXT = 5299,
SpvBuiltInLaunchIdKHR = 5319,
SpvBuiltInLaunchIdNV = 5319,
SpvBuiltInLaunchSizeKHR = 5320,
@ -990,6 +1008,7 @@ typedef enum SpvCapability_ {
SpvCapabilityFragmentFullyCoveredEXT = 5265,
SpvCapabilityMeshShadingNV = 5266,
SpvCapabilityImageFootprintNV = 5282,
SpvCapabilityMeshShadingEXT = 5283,
SpvCapabilityFragmentBarycentricKHR = 5284,
SpvCapabilityFragmentBarycentricNV = 5284,
SpvCapabilityComputeDerivativeGroupQuadsNV = 5288,
@ -1589,6 +1608,8 @@ typedef enum SpvOp_ {
SpvOpFragmentFetchAMD = 5012,
SpvOpReadClockKHR = 5056,
SpvOpImageSampleFootprintNV = 5283,
SpvOpEmitMeshTasksEXT = 5294,
SpvOpSetMeshOutputsEXT = 5295,
SpvOpGroupNonUniformPartitionNV = 5296,
SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
SpvOpReportIntersectionKHR = 5334,
@ -2262,6 +2283,8 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break;
case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;

View File

@ -94,6 +94,8 @@ enum ExecutionModel {
ExecutionModelMissNV = 5317,
ExecutionModelCallableKHR = 5318,
ExecutionModelCallableNV = 5318,
ExecutionModelTaskEXT = 5364,
ExecutionModelMeshEXT = 5365,
ExecutionModelMax = 0x7fffffff,
};
@ -161,11 +163,21 @@ enum ExecutionMode {
ExecutionModeSignedZeroInfNanPreserve = 4461,
ExecutionModeRoundingModeRTE = 4462,
ExecutionModeRoundingModeRTZ = 4463,
ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
ExecutionModeStencilRefReplacingEXT = 5027,
ExecutionModeStencilRefUnchangedFrontAMD = 5079,
ExecutionModeStencilRefGreaterFrontAMD = 5080,
ExecutionModeStencilRefLessFrontAMD = 5081,
ExecutionModeStencilRefUnchangedBackAMD = 5082,
ExecutionModeStencilRefGreaterBackAMD = 5083,
ExecutionModeStencilRefLessBackAMD = 5084,
ExecutionModeOutputLinesEXT = 5269,
ExecutionModeOutputLinesNV = 5269,
ExecutionModeOutputPrimitivesEXT = 5270,
ExecutionModeOutputPrimitivesNV = 5270,
ExecutionModeDerivativeGroupQuadsNV = 5289,
ExecutionModeDerivativeGroupLinearNV = 5290,
ExecutionModeOutputTrianglesEXT = 5298,
ExecutionModeOutputTrianglesNV = 5298,
ExecutionModePixelInterlockOrderedEXT = 5366,
ExecutionModePixelInterlockUnorderedEXT = 5367,
@ -215,6 +227,7 @@ enum StorageClass {
StorageClassShaderRecordBufferNV = 5343,
StorageClassPhysicalStorageBuffer = 5349,
StorageClassPhysicalStorageBufferEXT = 5349,
StorageClassTaskPayloadWorkgroupEXT = 5402,
StorageClassCodeSectionINTEL = 5605,
StorageClassDeviceOnlyINTEL = 5936,
StorageClassHostOnlyINTEL = 5937,
@ -497,6 +510,7 @@ enum Decoration {
DecorationPassthroughNV = 5250,
DecorationViewportRelativeNV = 5252,
DecorationSecondaryViewportRelativeNV = 5256,
DecorationPerPrimitiveEXT = 5271,
DecorationPerPrimitiveNV = 5271,
DecorationPerViewNV = 5272,
DecorationPerTaskNV = 5273,
@ -646,6 +660,10 @@ enum BuiltIn {
BuiltInFragmentSizeNV = 5292,
BuiltInFragInvocationCountEXT = 5293,
BuiltInInvocationsPerPixelNV = 5293,
BuiltInPrimitivePointIndicesEXT = 5294,
BuiltInPrimitiveLineIndicesEXT = 5295,
BuiltInPrimitiveTriangleIndicesEXT = 5296,
BuiltInCullPrimitiveEXT = 5299,
BuiltInLaunchIdKHR = 5319,
BuiltInLaunchIdNV = 5319,
BuiltInLaunchSizeKHR = 5320,
@ -986,6 +1004,7 @@ enum Capability {
CapabilityFragmentFullyCoveredEXT = 5265,
CapabilityMeshShadingNV = 5266,
CapabilityImageFootprintNV = 5282,
CapabilityMeshShadingEXT = 5283,
CapabilityFragmentBarycentricKHR = 5284,
CapabilityFragmentBarycentricNV = 5284,
CapabilityComputeDerivativeGroupQuadsNV = 5288,
@ -1585,6 +1604,8 @@ enum Op {
OpFragmentFetchAMD = 5012,
OpReadClockKHR = 5056,
OpImageSampleFootprintNV = 5283,
OpEmitMeshTasksEXT = 5294,
OpSetMeshOutputsEXT = 5295,
OpGroupNonUniformPartitionNV = 5296,
OpWritePackedPrimitiveIndices4x8NV = 5299,
OpReportIntersectionKHR = 5334,
@ -2258,6 +2279,8 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;

View File

@ -682,6 +682,7 @@ struct SPIREntryPoint
} workgroup_size;
uint32_t invocations = 0;
uint32_t output_vertices = 0;
uint32_t output_primitives = 0;
spv::ExecutionModel model = spv::ExecutionModelMax;
bool geometry_passthrough = false;
};
@ -776,7 +777,8 @@ struct SPIRBlock : IVariant
Unreachable, // Noop
Kill, // Discard
IgnoreIntersection, // Ray Tracing
TerminateRay // Ray Tracing
TerminateRay, // Ray Tracing
EmitMeshTasks // Mesh shaders
};
enum Merge
@ -838,6 +840,13 @@ struct SPIRBlock : IVariant
BlockID false_block = 0;
BlockID default_block = 0;
// If terminator is EmitMeshTasksEXT.
struct
{
ID groups[3];
ID payload;
} mesh = {};
SmallVector<Instruction> ops;
struct Phi

View File

@ -98,7 +98,8 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
// This is a global side effect of the function.
if (block.terminator == SPIRBlock::Kill ||
block.terminator == SPIRBlock::TerminateRay ||
block.terminator == SPIRBlock::IgnoreIntersection)
block.terminator == SPIRBlock::IgnoreIntersection ||
block.terminator == SPIRBlock::EmitMeshTasks)
return false;
for (auto &i : block.ops)
@ -154,6 +155,11 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
case OpEmitVertex:
return false;
// Mesh shader functions modify global state.
// (EmitMeshTasks is a terminator).
case OpSetMeshOutputsEXT:
return false;
// Barriers disallow any reordering, so we should treat blocks with barrier as writing.
case OpControlBarrier:
case OpMemoryBarrier:
@ -1069,8 +1075,11 @@ void Compiler::parse_fixup()
{
auto &var = id.get<SPIRVariable>();
if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
var.storage == StorageClassTaskPayloadWorkgroupEXT ||
var.storage == StorageClassOutput)
{
global_variables.push_back(var.self);
}
if (variable_storage_is_aliased(var))
aliased_variables.push_back(var.self);
}
@ -2177,6 +2186,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
execution.output_vertices = arg0;
break;
case ExecutionModeOutputPrimitivesEXT:
execution.output_primitives = arg0;
break;
default:
break;
}
@ -2297,6 +2310,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t
case ExecutionModeOutputVertices:
return execution.output_vertices;
case ExecutionModeOutputPrimitivesEXT:
return execution.output_primitives;
default:
return 0;
}

View File

@ -497,6 +497,15 @@ void CompilerGLSL::find_static_extensions()
require_extension_internal("GL_NV_ray_tracing");
break;
case ExecutionModelMeshEXT:
case ExecutionModelTaskEXT:
if (options.es || options.version < 450)
SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
require_extension_internal("GL_EXT_mesh_shader");
break;
default:
break;
}
@ -1060,6 +1069,8 @@ void CompilerGLSL::emit_header()
break;
case ExecutionModelGLCompute:
case ExecutionModelTaskEXT:
case ExecutionModelMeshEXT:
{
if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
{
@ -1078,6 +1089,18 @@ void CompilerGLSL::emit_header()
inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
}
if (execution.model == ExecutionModelMeshEXT)
{
outputs.push_back(join("max_vertices = ", execution.output_vertices));
outputs.push_back(join("max_primitives = ", execution.output_primitives));
if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
outputs.push_back("triangles");
else if (execution.flags.get(ExecutionModeOutputLinesEXT))
outputs.push_back("lines");
else if (execution.flags.get(ExecutionModeOutputPoints))
outputs.push_back("points");
}
break;
}
@ -1235,6 +1258,8 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
res += "sample ";
if (flags.get(DecorationInvariant))
res += "invariant ";
if (flags.get(DecorationPerPrimitiveEXT))
res += "perprimitiveEXT ";
if (flags.get(DecorationExplicitInterpAMD))
{
@ -2624,7 +2649,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
}
// Workaround to make sure we can emit "patch in/out" correctly.
fixup_io_block_patch_qualifiers(var);
fixup_io_block_patch_primitive_qualifiers(var);
// Block names should never alias.
auto block_name = to_name(type.self, false);
@ -2647,8 +2672,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
// Instance names cannot alias block names.
resource_names.insert(block_name);
bool is_patch = has_decoration(var.self, DecorationPatch);
statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
const char *block_qualifier;
if (has_decoration(var.self, DecorationPatch))
block_qualifier = "patch ";
else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
block_qualifier = "perprimitiveEXT ";
else
block_qualifier = "";
statement(layout_for_variable(var), block_qualifier, qual, block_name);
begin_scope();
type.member_name_cache.clear();
@ -3084,7 +3116,8 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
});
// If we're declaring clip/cull planes with control points we need to force block declaration.
if (get_execution_model() == ExecutionModelTessellationControl &&
if ((get_execution_model() == ExecutionModelTessellationControl ||
get_execution_model() == ExecutionModelMeshEXT) &&
(clip_distance_count || cull_distance_count))
{
should_force = true;
@ -3093,7 +3126,7 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
return should_force;
}
void CompilerGLSL::fixup_implicit_builtin_block_names()
void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
{
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
@ -3101,11 +3134,22 @@ void CompilerGLSL::fixup_implicit_builtin_block_names()
if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
is_builtin_variable(var))
{
// Make sure the array has a supported name in the code.
if (var.storage == StorageClassOutput)
set_name(var.self, "gl_out");
else if (var.storage == StorageClassInput)
set_name(var.self, "gl_in");
if (model != ExecutionModelMeshEXT)
{
// Make sure the array has a supported name in the code.
if (var.storage == StorageClassOutput)
set_name(var.self, "gl_out");
else if (var.storage == StorageClassInput)
set_name(var.self, "gl_in");
}
else
{
auto flags = get_buffer_block_flags(var.self);
if (flags.get(DecorationPerPrimitiveEXT))
set_name(var.self, "gl_MeshPrimitivesEXT");
else
set_name(var.self, "gl_MeshVerticesEXT");
}
}
});
}
@ -3129,6 +3173,11 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
};
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
bool block = has_decoration(type.self, DecorationBlock);
@ -3139,7 +3188,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
uint32_t index = 0;
for (auto &m : ir.meta[type.self].members)
{
if (m.builtin)
if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
{
builtins.set(m.builtin_type);
if (m.builtin_type == BuiltInCullDistance)
@ -3192,7 +3241,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
{
// While we're at it, collect all declared global builtins (HLSL mostly ...).
auto &m = ir.meta[var.self].decoration;
if (m.builtin)
if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
{
global_builtins.set(m.builtin_type);
if (m.builtin_type == BuiltInCullDistance)
@ -3281,7 +3330,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
attr.push_back(join("stream = ", geom_stream));
}
if (!attr.empty())
if (model == ExecutionModelMeshEXT)
statement("out gl_MeshPerVertexEXT");
else if (!attr.empty())
statement("layout(", merge(attr), ") out gl_PerVertex");
else
statement("out gl_PerVertex");
@ -3399,7 +3450,8 @@ void CompilerGLSL::emit_resources()
case ExecutionModelGeometry:
case ExecutionModelTessellationControl:
case ExecutionModelTessellationEvaluation:
fixup_implicit_builtin_block_names();
case ExecutionModelMeshEXT:
fixup_implicit_builtin_block_names(execution.model);
break;
default:
@ -3419,6 +3471,7 @@ void CompilerGLSL::emit_resources()
break;
case ExecutionModelVertex:
case ExecutionModelMeshEXT:
emit_declared_builtin_block(StorageClassOutput, execution.model);
break;
@ -8890,6 +8943,15 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
return "gl_FragFullyCoveredNV";
case BuiltInPrimitiveTriangleIndicesEXT:
return "gl_PrimitiveTriangleIndicesEXT";
case BuiltInPrimitiveLineIndicesEXT:
return "gl_PrimitiveLineIndicesEXT";
case BuiltInPrimitivePointIndicesEXT:
return "gl_PrimitivePointIndicesEXT";
case BuiltInCullPrimitiveEXT:
return "gl_CullPrimitiveEXT";
default:
return join("gl_BuiltIn_", convert_to_string(builtin));
}
@ -9078,14 +9140,19 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
// but HLSL seems to just emit straight arrays here.
// We must pretend this access goes through gl_in/gl_out arrays
// to be able to access certain builtins as arrays.
// Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
auto builtin = ir.meta[base].decoration.builtin_type;
bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
switch (builtin)
{
// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
// case BuiltInClipDistance:
case BuiltInPosition:
case BuiltInPointSize:
if (var->storage == StorageClassInput)
if (mesh_shader)
expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
else if (var->storage == StorageClassInput)
expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
else if (var->storage == StorageClassOutput)
expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
@ -9093,6 +9160,17 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
append_index(index, is_literal);
break;
case BuiltInPrimitiveId:
case BuiltInLayer:
case BuiltInViewportIndex:
case BuiltInCullPrimitiveEXT:
case BuiltInPrimitiveShadingRateKHR:
if (mesh_shader)
expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
else
append_index(index, is_literal);
break;
default:
append_index(index, is_literal);
break;
@ -13510,6 +13588,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
break;
case OpSetMeshOutputsEXT:
statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
break;
default:
statement("// unimplemented op ", instruction.op);
break;
@ -13818,28 +13900,41 @@ string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
}
void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
{
// Works around weird behavior in glslangValidator where
// a patch out block is translated to just block members getting the decoration.
// To make glslang not complain when we compile again, we have to transform this back to a case where
// the variable itself has Patch decoration, and not members.
// Same for perprimitiveEXT.
auto &type = get<SPIRType>(var.basetype);
if (has_decoration(type.self, DecorationBlock))
{
uint32_t member_count = uint32_t(type.member_types.size());
Decoration promoted_decoration = {};
bool do_promote_decoration = false;
for (uint32_t i = 0; i < member_count; i++)
{
if (has_member_decoration(type.self, i, DecorationPatch))
{
set_decoration(var.self, DecorationPatch);
promoted_decoration = DecorationPatch;
do_promote_decoration = true;
break;
}
else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
{
promoted_decoration = DecorationPerPrimitiveEXT;
do_promote_decoration = true;
break;
}
}
if (has_decoration(var.self, DecorationPatch))
if (do_promote_decoration)
{
set_decoration(var.self, promoted_decoration);
for (uint32_t i = 0; i < member_count; i++)
unset_member_decoration(type.self, i, DecorationPatch);
unset_member_decoration(type.self, i, promoted_decoration);
}
}
}
@ -13852,6 +13947,8 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
res += "shared ";
else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT)
res += "taskPayloadSharedEXT ";
res += to_interpolation_qualifiers(flags);
if (var)
@ -16009,6 +16106,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
statement("terminateRayEXT;");
break;
case SPIRBlock::EmitMeshTasks:
statement("EmitMeshTasksEXT(",
to_unpacked_expression(block.mesh.groups[0]), ", ",
to_unpacked_expression(block.mesh.groups[1]), ", ",
to_unpacked_expression(block.mesh.groups[2]), ");");
break;
default:
SPIRV_CROSS_THROW("Unimplemented block terminator.");
}
@ -16337,6 +16441,9 @@ void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr
case BuiltInIncomingRayFlagsNV:
case BuiltInLaunchIdNV:
case BuiltInLaunchSizeNV:
case BuiltInPrimitiveTriangleIndicesEXT:
case BuiltInPrimitiveLineIndicesEXT:
case BuiltInPrimitivePointIndicesEXT:
expected_type = SPIRType::UInt;
break;

View File

@ -628,7 +628,7 @@ protected:
void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration);
void emit_buffer_block_legacy(const SPIRVariable &var);
void emit_buffer_block_flattened(const SPIRVariable &type);
void fixup_implicit_builtin_block_names();
void fixup_implicit_builtin_block_names(spv::ExecutionModel model);
void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model);
bool should_force_emit_builtin_block(spv::StorageClass storage);
void emit_push_constant_block_vulkan(const SPIRVariable &var);
@ -772,7 +772,7 @@ protected:
std::string type_to_glsl_constructor(const SPIRType &type);
std::string argument_decl(const SPIRFunction::Parameter &arg);
virtual std::string to_qualifiers_glsl(uint32_t id);
void fixup_io_block_patch_qualifiers(const SPIRVariable &var);
void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var);
void emit_output_variable_initializer(const SPIRVariable &var);
std::string to_precision_qualifiers_glsl(uint32_t id);
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);

View File

@ -183,6 +183,15 @@ void Parser::parse(const Instruction &instruction)
auto op = static_cast<Op>(instruction.op);
uint32_t length = instruction.length;
// HACK for glslang that might emit OpEmitMeshTasksEXT followed by return / branch.
// Instead of failing hard, just ignore it.
if (ignore_trailing_block_opcodes)
{
ignore_trailing_block_opcodes = false;
if (op == OpReturn || op == OpBranch || op == OpUnreachable)
return;
}
switch (op)
{
case OpSourceContinued:
@ -349,6 +358,10 @@ void Parser::parse(const Instruction &instruction)
execution.output_vertices = ops[2];
break;
case ExecutionModeOutputPrimitivesEXT:
execution.output_primitives = ops[2];
break;
default:
break;
}
@ -1103,6 +1116,18 @@ void Parser::parse(const Instruction &instruction)
current_block = nullptr;
break;
case OpEmitMeshTasksEXT:
if (!current_block)
SPIRV_CROSS_THROW("Trying to end a non-existing block.");
current_block->terminator = SPIRBlock::EmitMeshTasks;
for (uint32_t i = 0; i < 3; i++)
current_block->mesh.groups[i] = ops[i];
current_block->mesh.payload = length >= 4 ? ops[3] : 0;
current_block = nullptr;
// Currently glslang is bugged and does not treat EmitMeshTasksEXT as a terminator.
ignore_trailing_block_opcodes = true;
break;
case OpReturn:
{
if (!current_block)

View File

@ -46,6 +46,8 @@ private:
ParsedIR ir;
SPIRFunction *current_function = nullptr;
SPIRBlock *current_block = nullptr;
// For workarounds.
bool ignore_trailing_block_opcodes = false;
void parse(const Instruction &instr);
const uint32_t *stream(const Instruction &instr) const;