Style nits for GL subgroup implementation.

This commit is contained in:
Hans-Kristian Arntzen 2020-10-08 12:41:01 +02:00
parent a6f6547cf1
commit 5619329665
3 changed files with 283 additions and 235 deletions

View File

@ -1,4 +1,5 @@
#version 450 #version 450
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_ballot : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -9,6 +10,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -23,6 +25,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -33,6 +36,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -40,6 +44,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -47,6 +52,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_ballot : require
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) #elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
@ -57,11 +63,13 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_ballot : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
#extension GL_NV_shader_thread_group : require #extension GL_NV_shader_thread_group : require
#endif #endif
#if defined(GL_KHR_shader_subgroup_vote) #if defined(GL_KHR_shader_subgroup_vote)
#extension GL_KHR_shader_subgroup_vote : require #extension GL_KHR_shader_subgroup_vote : require
#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5)) #elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
@ -75,6 +83,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -89,9 +98,11 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_basic : require
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_ballot : require
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
@ -102,6 +113,7 @@
#else #else
#error No extensions available to emulate requested subgroup feature. #error No extensions available to emulate requested subgroup feature.
#endif #endif
#if defined(GL_NV_shader_thread_group) #if defined(GL_NV_shader_thread_group)
#extension GL_NV_shader_thread_group : require #extension GL_NV_shader_thread_group : require
#endif #endif
@ -126,6 +138,7 @@ layout(binding = 0, std430) buffer SSBO
#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u) #define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)
#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u) #define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
#define gl_SubgroupSize gl_WarpSizeNV #define gl_SubgroupSize gl_WarpSizeNV
@ -134,20 +147,24 @@ layout(binding = 0, std430) buffer SSBO
#elif defined(GL_AMD_gcn_shader) #elif defined(GL_AMD_gcn_shader)
#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD) #define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
#define gl_SubgroupInvocationID gl_ThreadInWarpNV #define gl_SubgroupInvocationID gl_ThreadInWarpNV
#elif defined(GL_ARB_shader_ballot) #elif defined(GL_ARB_shader_ballot)
#define gl_SubgroupInvocationID gl_SubGroupInvocationARB #define gl_SubgroupInvocationID gl_SubGroupInvocationARB
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
#define gl_SubgroupID gl_WarpIDNV #define gl_SubgroupID gl_WarpIDNV
#endif #endif
#if defined(GL_KHR_shader_subgroup_basic) #if defined(GL_KHR_shader_subgroup_basic)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
#define gl_NumSubgroups gl_WarpsPerSMNV #define gl_NumSubgroups gl_WarpsPerSMNV
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#elif defined(GL_ARB_shader_ballot) #elif defined(GL_ARB_shader_ballot)
int subgroupBroadcastFirst(int value) { return readFirstInvocationARB(value); } int subgroupBroadcastFirst(int value) { return readFirstInvocationARB(value); }
@ -216,25 +233,29 @@ dvec2 subgroupBroadcast(dvec2 value, uint id) { return shuffleNV(value, id, gl_W
dvec3 subgroupBroadcast(dvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } dvec3 subgroupBroadcast(dvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
dvec4 subgroupBroadcast(dvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); } dvec4 subgroupBroadcast(dvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); } uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }
uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); } uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }
#else #else
uint subgroupBallotFindLSB(uvec4 value) { uint subgroupBallotFindLSB(uvec4 value)
int firstLive = findLSB(value.x); {
return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32)); int firstLive = findLSB(value.x);
return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));
} }
uint subgroupBallotFindMSB(uvec4 value) { uint subgroupBallotFindMSB(uvec4 value)
int firstLive = findMSB(value.y); {
return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x)); int firstLive = findMSB(value.y);
return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));
} }
#endif #endif
#if defined(GL_KHR_shader_subgroup_vote) #if defined(GL_KHR_shader_subgroup_vote)
#elif defined(GL_AMD_gcn_shader) #elif defined(GL_AMD_gcn_shader)
bool subgroupAll(bool value) { return ballotAMD(value)==ballotAMD(true); } bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }
bool subgroupAny(bool value) { return ballotAMD(value)!=0ull; } bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }
bool subgroupAllEqual(bool value) { uint64_t b=ballotAMD(value); return b==0uLL || b==ballotAMD(true); } bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || b == ballotAMD(true); }
#elif defined(GL_NV_gpu_shader_5) #elif defined(GL_NV_gpu_shader_5)
bool subgroupAll(bool value) { return allThreadsNV(value); } bool subgroupAll(bool value) { return allThreadsNV(value); }
bool subgroupAny(bool value) { return anyThreadNV(value); } bool subgroupAny(bool value) { return anyThreadNV(value); }
@ -244,8 +265,9 @@ bool subgroupAll(bool v) { return allInvocationsARB(v); }
bool subgroupAny(bool v) { return anyInvocationARB(v); } bool subgroupAny(bool v) { return anyInvocationARB(v); }
bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); } bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }
#endif #endif
#ifndef GL_KHR_shader_subgroup_vote #ifndef GL_KHR_shader_subgroup_vote
#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return subgroupAllEqual(subgroupBroadcastFirst(value)==value); } #define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return subgroupAllEqual(subgroupBroadcastFirst(value) == value); }
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(int) _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(int)
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec2) _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec2)
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec3) _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec3)
@ -264,76 +286,85 @@ _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec3)
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec4) _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec4)
#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND #undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND
#endif #endif
#if defined(GL_KHR_shader_subgroup_ballot) #if defined(GL_KHR_shader_subgroup_ballot)
#elif defined(GL_NV_shader_thread_group) #elif defined(GL_NV_shader_thread_group)
uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); } uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }
#elif defined(GL_ARB_shader_ballot) #elif defined(GL_ARB_shader_ballot)
uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); } uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }
#endif #endif
#ifndef GL_KHR_shader_subgroup_basic #ifndef GL_KHR_shader_subgroup_basic
bool subgroupElect() { bool subgroupElect()
uvec4 activeMask = subgroupBallot(true); {
uint firstLive = subgroupBallotFindLSB(activeMask); uvec4 activeMask = subgroupBallot(true);
return gl_SubgroupInvocationID == firstLive; uint firstLive = subgroupBallotFindLSB(activeMask);
return gl_SubgroupInvocationID == firstLive;
} }
#endif #endif
#ifndef GL_KHR_shader_subgroup_basic #ifndef GL_KHR_shader_subgroup_basic
void subgroupBarrier() { /*NOOP*/ } void subgroupBarrier() { /*NOOP*/ }
#endif #endif
#ifndef GL_KHR_shader_subgroup_basic #ifndef GL_KHR_shader_subgroup_basic
void subgroupMemoryBarrier() { groupMemoryBarrier(); } void subgroupMemoryBarrier() { groupMemoryBarrier(); }
void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); } void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }
void subgroupMemoryBarrierShared() { groupMemoryBarrier(); } void subgroupMemoryBarrierShared() { groupMemoryBarrier(); }
void subgroupMemoryBarrierImage() { groupMemoryBarrier(); } void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }
#endif #endif
#ifndef GL_KHR_shader_subgroup_ballot #ifndef GL_KHR_shader_subgroup_ballot
bool subgroupInverseBallot(uvec4 value) bool subgroupInverseBallot(uvec4 value)
{ {
return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u))); return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));
} }
uint subgroupBallotInclusiveBitCount(uvec4 value) uint subgroupBallotInclusiveBitCount(uvec4 value)
{ {
uvec2 v = value.xy & gl_SubgroupLeMask.xy; uvec2 v = value.xy & gl_SubgroupLeMask.xy;
ivec2 c = bitCount(v); ivec2 c = bitCount(v);
#ifdef GL_NV_shader_thread_group #ifdef GL_NV_shader_thread_group
return uint(c.x); return uint(c.x);
#else #else
return uint(c.x + c.y); return uint(c.x + c.y);
#endif #endif
} }
uint subgroupBallotExclusiveBitCount(uvec4 value) uint subgroupBallotExclusiveBitCount(uvec4 value)
{ {
uvec2 v = value.xy & gl_SubgroupLtMask.xy; uvec2 v = value.xy & gl_SubgroupLtMask.xy;
ivec2 c = bitCount(v); ivec2 c = bitCount(v);
#ifdef GL_NV_shader_thread_group #ifdef GL_NV_shader_thread_group
return uint(c.x); return uint(c.x);
#else #else
return uint(c.x + c.y); return uint(c.x + c.y);
#endif #endif
} }
#endif #endif
#ifndef GL_KHR_shader_subgroup_ballot #ifndef GL_KHR_shader_subgroup_ballot
uint subgroupBallotBitCount(uvec4 value) uint subgroupBallotBitCount(uvec4 value)
{ {
ivec2 c = bitCount(value.xy); ivec2 c = bitCount(value.xy);
#ifdef GL_NV_shader_thread_group #ifdef GL_NV_shader_thread_group
return uint(c.x); return uint(c.x);
#else #else
return uint(c.x + c.y); return uint(c.x + c.y);
#endif #endif
} }
#endif #endif
#ifndef GL_KHR_shader_subgroup_ballot #ifndef GL_KHR_shader_subgroup_ballot
bool subgroupBallotBitExtract(uvec4 value, uint index) bool subgroupBallotBitExtract(uvec4 value, uint index)
{ {
#ifdef GL_NV_shader_thread_group #ifdef GL_NV_shader_thread_group
uint shifted = value.x >> index; uint shifted = value.x >> index;
#else #else
uint shifted = value[index >> 5u] >> (index & 0x1fu); uint shifted = value[index >> 5u] >> (index & 0x1fu);
#endif #endif
return (shifted & 1u) != 0u; return (shifted & 1u) != 0u;
} }
#endif #endif
void main() void main()
{ {
_9.FragColor = float(gl_NumSubgroups); _9.FragColor = float(gl_NumSubgroups);

View File

@ -548,7 +548,7 @@ string CompilerGLSL::compile()
emit_header(); emit_header();
emit_resources(); emit_resources();
emit_extension_workarounds(ir.entry_points[ir.default_entry_point].model); emit_extension_workarounds(get_execution_model());
emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
@ -625,9 +625,8 @@ void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature
{ {
if (options.vulkan_semantics) if (options.vulkan_semantics)
{ {
const ShaderSubgroupSupportHelper::Candidate khrExt = auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
shader_subgroup_supporter.get_KHR_extension_for_feature(feature); require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
require_extension_internal(shader_subgroup_supporter.get_extension_name(khrExt));
} }
else else
{ {
@ -742,33 +741,33 @@ void CompilerGLSL::emit_header()
if (!options.vulkan_semantics) if (!options.vulkan_semantics)
{ {
using Supp = ShaderSubgroupSupportHelper; using Supp = ShaderSubgroupSupportHelper;
auto result = shader_subgroup_supporter.resolve();
Supp::Result result = shader_subgroup_supporter.resolve(); for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
for (uint32_t ft = 0u; ft < Supp::FeatureCount; ++ft)
{ {
Supp::Feature feature = static_cast<Supp::Feature>(ft); auto feature = static_cast<Supp::Feature>(feature_index);
if (!shader_subgroup_supporter.is_feature_requested(feature)) if (!shader_subgroup_supporter.is_feature_requested(feature))
continue; continue;
auto exts = shader_subgroup_supporter.get_candidates_for_feature(feature, result); auto exts = Supp::get_candidates_for_feature(feature, result);
if (exts.empty()) if (exts.empty())
continue; continue;
for (auto it = exts.begin(); it != exts.end(); ++it) statement("");
{
const Supp::Candidate ext = *it;
std::string name = Supp::get_extension_name(ext); for (auto &ext : exts)
std::string extraPredicate = Supp::get_extra_required_extension_predicate(ext); {
auto extraNames = Supp::get_extra_required_extension_names(ext); const char *name = Supp::get_extension_name(ext);
statement(it != exts.begin() ? "#elif" : "#if", " defined(", name, ")", const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
extraPredicate.empty() ? "" : (" && " + extraPredicate)); auto extra_names = Supp::get_extra_required_extension_names(ext);
for (const auto &e : extraNames) statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
(*extra_predicate != '\0' ? " && " : ""), extra_predicate);
for (const auto &e : extra_names)
statement("#extension ", e, " : enable"); statement("#extension ", e, " : enable");
statement("#extension ", name, " : require"); statement("#extension ", name, " : require");
} }
if (!Supp::can_feature_be_implemented_wo_extensions(feature)) if (!Supp::can_feature_be_implemented_without_extensions(feature))
{ {
statement("#else"); statement("#else");
statement("#error No extensions available to emulate requested subgroup feature."); statement("#error No extensions available to emulate requested subgroup feature.");
@ -3399,25 +3398,25 @@ void CompilerGLSL::emit_resources()
void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
{ {
static const char *TYPES[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", static const char *workaround_types[] = {
"float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
"float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4"
};
if (!options.vulkan_semantics) if (!options.vulkan_semantics)
{ {
using Supp = ShaderSubgroupSupportHelper; using Supp = ShaderSubgroupSupportHelper;
auto result = shader_subgroup_supporter.resolve();
Supp::Result result = shader_subgroup_supporter.resolve();
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupMask, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3439,17 +3438,18 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupSize, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3466,17 +3466,18 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupInvocationID, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3490,17 +3491,18 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupID, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3511,17 +3513,18 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::NumSubgroups, result); auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3532,51 +3535,61 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBrodcast_First)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBrodcast_First))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupBrodcast_First, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBrodcast_First, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_shuffle: case Supp::NV_shader_thread_shuffle:
for (const char *t : TYPES) for (const char *t : workaround_types)
statement(string(t) + " subgroupBroadcastFirst(" + t + {
statement(t, " subgroupBroadcastFirst(", t,
" value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
for (const char *t : TYPES) }
statement(string(t) + " subgroupBroadcast(" + t + for (const char *t : workaround_types)
{
statement(t, " subgroupBroadcast(", t,
" value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
}
break; break;
case Supp::ARB_shader_ballot: case Supp::ARB_shader_ballot:
for (const char *t : TYPES) for (const char *t : workaround_types)
statement(string(t) + " subgroupBroadcastFirst(" + t + {
statement(t, " subgroupBroadcastFirst(", t,
" value) { return readFirstInvocationARB(value); }"); " value) { return readFirstInvocationARB(value); }");
for (const char *t : TYPES) }
statement(string(t) + " subgroupBroadcast(" + t + for (const char *t : workaround_types)
{
statement(t, " subgroupBroadcast(", t,
" value, uint id) { return readInvocationARB(value, id); }"); " value, uint id) { return readInvocationARB(value, id); }");
}
break; break;
default: default:
break; break;
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3588,27 +3601,29 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#else"); statement("#else");
statement("uint subgroupBallotFindLSB(uvec4 value) {"); statement("uint subgroupBallotFindLSB(uvec4 value)");
begin_scope();
statement("int firstLive = findLSB(value.x);"); statement("int firstLive = findLSB(value.x);");
statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
statement("}"); end_scope();
statement("uint subgroupBallotFindMSB(uvec4 value) {"); statement("uint subgroupBallotFindMSB(uvec4 value)");
begin_scope();
statement("int firstLive = findMSB(value.y);"); statement("int firstLive = findMSB(value.y);");
statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
statement("}"); end_scope();
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
{ {
auto exts = auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_gpu_shader_5: case Supp::NV_gpu_shader_5:
@ -3622,40 +3637,41 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
break; break;
case Supp::AMD_gcn_shader: case Supp::AMD_gcn_shader:
statement("bool subgroupAll(bool value) { return ballotAMD(value)==ballotAMD(true); }"); statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
statement("bool subgroupAny(bool value) { return ballotAMD(value)!=0ull; }"); statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
statement("bool subgroupAllEqual(bool value) { uint64_t b=ballotAMD(value); return b==0uLL || " statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
"b==ballotAMD(true); }"); "b == ballotAMD(true); }");
break; break;
default: default:
break; break;
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
{ {
statement("#ifndef GL_KHR_shader_subgroup_vote"); statement("#ifndef GL_KHR_shader_subgroup_vote");
statement( statement(
"#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
"subgroupAllEqual(subgroupBroadcastFirst(value)==value); }"); "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
for (const char *t : TYPES) for (const char *t : workaround_types)
{ statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
statement(std::string("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(") + t + ")");
}
statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
{ {
auto exts = shader_subgroup_supporter.get_candidates_for_feature(Supp::SubgroupBallot, result); auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
for (uint32_t i = 0u; i < exts.size(); ++i) for (auto &e : exts)
{ {
const Supp::Candidate e = exts[i]; const char *name = Supp::get_extension_name(e);
const std::string name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
statement(i == 0u ? "#if" : "#elif", " defined(", name, ")");
switch (e) switch (e)
{ {
case Supp::NV_shader_thread_group: case Supp::NV_shader_thread_group:
@ -3669,17 +3685,22 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
} }
} }
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
{ {
statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("#ifndef GL_KHR_shader_subgroup_basic");
statement("bool subgroupElect() {"); statement("bool subgroupElect()");
begin_scope();
statement("uvec4 activeMask = subgroupBallot(true);"); statement("uvec4 activeMask = subgroupBallot(true);");
statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
statement("return gl_SubgroupInvocationID == firstLive;"); statement("return gl_SubgroupInvocationID == firstLive;");
statement("}"); end_scope();
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
{ {
// Extensions we're using in place of GL_KHR_shader_subgroup_basic state // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
@ -3687,7 +3708,9 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("#ifndef GL_KHR_shader_subgroup_basic");
statement("void subgroupBarrier() { /*NOOP*/ }"); statement("void subgroupBarrier() { /*NOOP*/ }");
statement("#endif"); statement("#endif");
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
{ {
if (model == spv::ExecutionModelGLCompute) if (model == spv::ExecutionModelGLCompute)
@ -3707,65 +3730,72 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
statement("#endif"); statement("#endif");
} }
statement("");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
{ {
statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("#ifndef GL_KHR_shader_subgroup_ballot");
statement("bool subgroupInverseBallot(uvec4 value)"); statement("bool subgroupInverseBallot(uvec4 value)");
statement("{"); begin_scope();
statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
statement("}"); end_scope();
statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
statement("{"); begin_scope();
statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
statement("ivec2 c = bitCount(v);"); statement("ivec2 c = bitCount(v);");
statement("#ifdef GL_NV_shader_thread_group"); statement_no_indent("#ifdef GL_NV_shader_thread_group");
statement("return uint(c.x);"); statement("return uint(c.x);");
statement("#else"); statement_no_indent("#else");
statement("return uint(c.x + c.y);"); statement("return uint(c.x + c.y);");
statement("#endif"); statement_no_indent("#endif");
statement("}"); end_scope();
statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
statement("{"); begin_scope();
statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
statement("ivec2 c = bitCount(v);"); statement("ivec2 c = bitCount(v);");
statement("#ifdef GL_NV_shader_thread_group"); statement_no_indent("#ifdef GL_NV_shader_thread_group");
statement("return uint(c.x);"); statement("return uint(c.x);");
statement("#else"); statement_no_indent("#else");
statement("return uint(c.x + c.y);"); statement("return uint(c.x + c.y);");
statement_no_indent("#endif");
end_scope();
statement("#endif"); statement("#endif");
statement("}"); statement("");
statement("#endif");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
{ {
statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("#ifndef GL_KHR_shader_subgroup_ballot");
statement("uint subgroupBallotBitCount(uvec4 value)"); statement("uint subgroupBallotBitCount(uvec4 value)");
statement("{"); begin_scope();
statement("ivec2 c = bitCount(value.xy);"); statement("ivec2 c = bitCount(value.xy);");
statement("#ifdef GL_NV_shader_thread_group"); statement_no_indent("#ifdef GL_NV_shader_thread_group");
statement("return uint(c.x);"); statement("return uint(c.x);");
statement("#else"); statement_no_indent("#else");
statement("return uint(c.x + c.y);"); statement("return uint(c.x + c.y);");
statement_no_indent("#endif");
end_scope();
statement("#endif"); statement("#endif");
statement("}"); statement("");
statement("#endif");
} }
if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
{ {
statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("#ifndef GL_KHR_shader_subgroup_ballot");
statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
statement("{"); begin_scope();
statement("#ifdef GL_NV_shader_thread_group"); statement_no_indent("#ifdef GL_NV_shader_thread_group");
statement("uint shifted = value.x >> index;"); statement("uint shifted = value.x >> index;");
statement("#else"); statement_no_indent("#else");
statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
statement("#endif"); statement_no_indent("#endif");
statement("return (shifted & 1u) != 0u;"); statement("return (shifted & 1u) != 0u;");
statement("}"); end_scope();
statement("#endif"); statement("#endif");
statement("");
} }
} }
} }
@ -5930,7 +5960,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s
} }
} }
bool CompilerGLSL::is_supported_subgroup_op(spv::Op op) bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
{ {
switch (op) switch (op)
{ {
@ -7209,7 +7239,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
const uint32_t *ops = stream(i); const uint32_t *ops = stream(i);
auto op = static_cast<Op>(i.op); auto op = static_cast<Op>(i.op);
if (!options.vulkan_semantics && !is_supported_subgroup_op(op)) if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
// If we need to do implicit bitcasts, make sure we do it with the correct type. // If we need to do implicit bitcasts, make sure we do it with the correct type.
@ -14856,22 +14886,23 @@ bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id); return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
} }
std::string CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
{ {
const char *const retval[CandidateCount]{ "GL_KHR_shader_subgroup_ballot", static const char * const retval[CandidateCount] = {
"GL_KHR_shader_subgroup_basic", "GL_KHR_shader_subgroup_ballot",
"GL_KHR_shader_subgroup_vote", "GL_KHR_shader_subgroup_basic",
"GL_NV_gpu_shader_5", "GL_KHR_shader_subgroup_vote",
"GL_NV_shader_thread_group", "GL_NV_gpu_shader_5",
"GL_NV_shader_thread_shuffle", "GL_NV_shader_thread_group",
"GL_ARB_shader_ballot", "GL_NV_shader_thread_shuffle",
"GL_ARB_shader_group_vote", "GL_ARB_shader_ballot",
"GL_AMD_gcn_shader" }; "GL_ARB_shader_group_vote",
"GL_AMD_gcn_shader"
};
return retval[c]; return retval[c];
} }
SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper:: SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
get_extra_required_extension_names(Candidate c)
{ {
switch (c) switch (c)
{ {
@ -14884,8 +14915,7 @@ SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::
} }
} }
std::string CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate( const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
Candidate c)
{ {
switch (c) switch (c)
{ {
@ -14898,8 +14928,8 @@ std::string CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extens
} }
} }
auto CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature feature) CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector
-> SmallVector<Feature> CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature feature)
{ {
switch (feature) switch (feature)
{ {
@ -14916,28 +14946,29 @@ auto CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature
} }
} }
auto CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependency_mask( CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask
Feature feature) -> FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependency_mask(Feature feature)
{ {
return build_mask(get_feature_dependencies(feature)); return build_mask(get_feature_dependencies(feature));
} }
bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_wo_extensions( bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
Feature feature)
{ {
const static bool retval[FeatureCount]{ false, false, false, false, false, false, static const bool retval[FeatureCount] = {
true, // SubgroupBalloFindLSB_MSB false, false, false, false, false, false,
false, false, false, false, true, // SubgroupBalloFindLSB_MSB
true, // SubgroupMemBarrier - replaced with workgroup memory barriers false, false, false, false,
false, false, true, false }; true, // SubgroupMemBarrier - replaced with workgroup memory barriers
false, false, true, false
};
return retval[feature]; return retval[feature];
} }
auto SPIRV_CROSS_NAMESPACE::CompilerGLSL::ShaderSubgroupSupportHelper::get_KHR_extension_for_feature( CompilerGLSL::ShaderSubgroupSupportHelper::Candidate
Feature feature) -> Candidate CompilerGLSL::ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(Feature feature)
{ {
static const Candidate extensions[FeatureCount]{ static const Candidate extensions[FeatureCount] = {
KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic,
@ -14947,61 +14978,64 @@ auto SPIRV_CROSS_NAMESPACE::CompilerGLSL::ShaderSubgroupSupportHelper::get_KHR_e
return extensions[feature]; return extensions[feature];
} }
void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature ft) void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
{ {
featureMask |= ((FeatureMask{ 1 } << ft) | get_feature_dependency_mask(ft)); feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
} }
bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature ft) const bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
{ {
return static_cast<bool>(featureMask & (1u << ft)); return (feature_mask & (1u << feature)) != 0;
} }
auto CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const -> Result CompilerGLSL::ShaderSubgroupSupportHelper::Result
CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
{ {
Result res; Result res;
for (uint32_t i = 0u; i < FeatureCount; ++i) for (uint32_t i = 0u; i < FeatureCount; ++i)
{ {
if (featureMask & (1u << i)) if (feature_mask & (1u << i))
{ {
const Feature feature = static_cast<Feature>(i); auto feature = static_cast<Feature>(i);
std::unordered_set<uint32_t> uniqueCandidates; std::unordered_set<uint32_t> unique_candidates;
auto candidates = get_candidates_for_feature(feature); auto candidates = get_candidates_for_feature(feature);
uniqueCandidates.insert(candidates.begin(), candidates.end()); unique_candidates.insert(candidates.begin(), candidates.end());
auto deps = get_feature_dependencies(feature); auto deps = get_feature_dependencies(feature);
for (Feature d : deps) for (Feature d : deps)
if ((candidates = get_candidates_for_feature(d)).size() > 0ull) {
uniqueCandidates.insert(candidates.begin(), candidates.end()); candidates = get_candidates_for_feature(d);
if (!candidates.empty())
unique_candidates.insert(candidates.begin(), candidates.end());
}
for (uint32_t c : uniqueCandidates) for (uint32_t c : unique_candidates)
++res[static_cast<Candidate>(c)]; ++res.weights[static_cast<Candidate>(c)];
} }
} }
return res; return res;
} }
auto CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft, const Result &r) CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector
-> SmallVector<Candidate, CandidateCount> CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft, const Result &r)
{ {
auto c = get_candidates_for_feature(ft); auto c = get_candidates_for_feature(ft);
auto cmp = [&r](Candidate a, Candidate b) { auto cmp = [&r](Candidate a, Candidate b) {
if (r[a] == r[b]) if (r.weights[a] == r.weights[b])
return a < b; // prefer candidates with lower enum value return a < b; // Prefer candidates with lower enum value
return r[a] > r[b]; return r.weights[a] > r.weights[b];
}; };
std::sort(c.begin(), c.end(), cmp); std::sort(c.begin(), c.end(), cmp);
return c; return c;
} }
auto SPIRV_CROSS_NAMESPACE::CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft) CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector
-> SmallVector<Candidate, CandidateCount> CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature feature)
{ {
switch (ft) switch (feature)
{ {
case SubgroupMask: case SubgroupMask:
return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
@ -15040,24 +15074,23 @@ auto SPIRV_CROSS_NAMESPACE::CompilerGLSL::ShaderSubgroupSupportHelper::get_candi
} }
} }
auto CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask
SmallVector<Feature> features) -> FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(const SmallVector<Feature> &features)
{ {
FeatureMask mask = 0; FeatureMask mask = 0;
for (Feature f : features) for (Feature f : features)
mask |= (FeatureMask{ 1 } << f); mask |= FeatureMask(1) << f;
return mask; return mask;
} }
CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
{ {
std::fill(weights.begin(), weights.end(), 0u); for (auto &weight : weights)
weight = 0;
// make sure KHR_shader_subgroup extensions are always prefered // Make sure KHR_shader_subgroup extensions are always prefered.
const uint32_t bigNum = static_cast<uint32_t>(FeatureCount); const uint32_t big_num = FeatureCount;
weights[KHR_shader_subgroup_ballot] = bigNum; weights[KHR_shader_subgroup_ballot] = big_num;
weights[KHR_shader_subgroup_basic] = bigNum; weights[KHR_shader_subgroup_basic] = big_num;
weights[KHR_shader_subgroup_vote] = bigNum; weights[KHR_shader_subgroup_vote] = big_num;
} }

View File

@ -19,7 +19,6 @@
#include "GLSL.std.450.h" #include "GLSL.std.450.h"
#include "spirv_cross.hpp" #include "spirv_cross.hpp"
#include <array>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
@ -263,11 +262,10 @@ protected:
CandidateCount CandidateCount
}; };
static std::string get_extension_name(Candidate c);
static const char *get_extension_name(Candidate c);
static SmallVector<std::string> get_extra_required_extension_names(Candidate c); static SmallVector<std::string> get_extra_required_extension_names(Candidate c);
static const char *get_extra_required_extension_predicate(Candidate c);
static std::string get_extra_required_extension_predicate(Candidate c);
enum Feature enum Feature
{ {
@ -294,48 +292,34 @@ protected:
using FeatureMask = uint32_t; using FeatureMask = uint32_t;
static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits."); static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits.");
static SmallVector<Feature> get_feature_dependencies(Feature feature); using CandidateVector = SmallVector<Candidate, CandidateCount>;
using FeatureVector = SmallVector<Feature>;
static FeatureVector get_feature_dependencies(Feature feature);
static FeatureMask get_feature_dependency_mask(Feature feature); static FeatureMask get_feature_dependency_mask(Feature feature);
static bool can_feature_be_implemented_without_extensions(Feature feature);
static bool can_feature_be_implemented_wo_extensions(Feature feature);
static Candidate get_KHR_extension_for_feature(Feature feature); static Candidate get_KHR_extension_for_feature(Feature feature);
struct Result struct Result
{ {
Result(); Result();
uint32_t weights[CandidateCount];
inline uint32_t operator[](Candidate c) const
{
return weights[c];
}
inline uint32_t &operator[](Candidate c)
{
return weights[c];
}
std::array<uint32_t, CandidateCount> weights;
}; };
void request_feature(Feature ft); void request_feature(Feature feature);
bool is_feature_requested(Feature feature) const;
bool is_feature_requested(Feature ft) const;
Result resolve() const; Result resolve() const;
static SmallVector<Candidate, CandidateCount> get_candidates_for_feature(Feature ft, const Result &r); static CandidateVector get_candidates_for_feature(Feature ft, const Result &r);
private: private:
static SmallVector<Candidate, CandidateCount> get_candidates_for_feature(Feature ft); static CandidateVector get_candidates_for_feature(Feature ft);
static FeatureMask build_mask(const SmallVector<Feature> &features);
static FeatureMask build_mask(SmallVector<Feature> features); FeatureMask feature_mask = 0;
FeatureMask featureMask = 0;
}; };
// TODO remove this function when all subgroup ops are supported (or make it always return true) // TODO remove this function when all subgroup ops are supported (or make it always return true)
static bool is_supported_subgroup_op(spv::Op op); static bool is_supported_subgroup_op_in_opengl(spv::Op op);
void reset(); void reset();
void emit_function(SPIRFunction &func, const Bitset &return_flags); void emit_function(SPIRFunction &func, const Bitset &return_flags);