Resolved issues 1350, 1351, 1352
This commit is contained in:
parent
8891bd3512
commit
db52e277b9
371
reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
Normal file
371
reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
Normal file
@ -0,0 +1,371 @@
|
||||
#version 450
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
|
||||
#extension GL_AMD_gpu_shader_int64 : enable
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#extension GL_AMD_gcn_shader : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#elif defined(GL_NV_shader_thread_shuffle)
|
||||
#extension GL_NV_shader_thread_shuffle : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_vote)
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
|
||||
#extension GL_AMD_gpu_shader_int64 : enable
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#extension GL_AMD_gcn_shader : require
|
||||
#elif defined(GL_NV_gpu_shader_5)
|
||||
#extension GL_NV_gpu_shader_5 : require
|
||||
#elif defined(GL_ARB_shader_group_vote)
|
||||
#extension GL_ARB_shader_group_vote : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
|
||||
#extension GL_AMD_gpu_shader_int64 : enable
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#extension GL_AMD_gcn_shader : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
|
||||
#extension GL_ARB_shader_int64 : enable
|
||||
#extension GL_ARB_shader_ballot : require
|
||||
#else
|
||||
#error No extensions available to emulate requested subgroup feature.
|
||||
#endif
|
||||
#if defined(GL_NV_shader_thread_group)
|
||||
#extension GL_NV_shader_thread_group : require
|
||||
#endif
|
||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(binding = 0, std430) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
} _9;
|
||||
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)
|
||||
#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)
|
||||
#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)
|
||||
#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)
|
||||
#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)
|
||||
#elif defined(GL_ARB_shader_ballot)
|
||||
#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)
|
||||
#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)
|
||||
#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)
|
||||
#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)
|
||||
#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#define gl_SubgroupSize gl_WarpSizeNV
|
||||
#elif defined(GL_ARB_shader_ballot)
|
||||
#define gl_SubgroupSize gl_SubGroupSizeARB
|
||||
#elif defined(GL_AMD_gcn_shader)
|
||||
#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#define gl_SubgroupInvocationID gl_ThreadInWarpNV
|
||||
#elif defined(GL_ARB_shader_ballot)
|
||||
#define gl_SubgroupInvocationID gl_SubGroupInvocationARB
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#define gl_SubgroupID gl_WarpIDNV
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_basic)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
#define gl_NumSubgroups gl_WarpsPerSMNV
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#elif defined(GL_ARB_shader_ballot)
|
||||
int subgroupBroadcastFirst(int value) { return readFirstInvocationARB(value); }
|
||||
ivec2 subgroupBroadcastFirst(ivec2 value) { return readFirstInvocationARB(value); }
|
||||
ivec3 subgroupBroadcastFirst(ivec3 value) { return readFirstInvocationARB(value); }
|
||||
ivec4 subgroupBroadcastFirst(ivec4 value) { return readFirstInvocationARB(value); }
|
||||
uint subgroupBroadcastFirst(uint value) { return readFirstInvocationARB(value); }
|
||||
uvec2 subgroupBroadcastFirst(uvec2 value) { return readFirstInvocationARB(value); }
|
||||
uvec3 subgroupBroadcastFirst(uvec3 value) { return readFirstInvocationARB(value); }
|
||||
uvec4 subgroupBroadcastFirst(uvec4 value) { return readFirstInvocationARB(value); }
|
||||
float subgroupBroadcastFirst(float value) { return readFirstInvocationARB(value); }
|
||||
vec2 subgroupBroadcastFirst(vec2 value) { return readFirstInvocationARB(value); }
|
||||
vec3 subgroupBroadcastFirst(vec3 value) { return readFirstInvocationARB(value); }
|
||||
vec4 subgroupBroadcastFirst(vec4 value) { return readFirstInvocationARB(value); }
|
||||
double subgroupBroadcastFirst(double value) { return readFirstInvocationARB(value); }
|
||||
dvec2 subgroupBroadcastFirst(dvec2 value) { return readFirstInvocationARB(value); }
|
||||
dvec3 subgroupBroadcastFirst(dvec3 value) { return readFirstInvocationARB(value); }
|
||||
dvec4 subgroupBroadcastFirst(dvec4 value) { return readFirstInvocationARB(value); }
|
||||
int subgroupBroadcast(int value, uint id) { return readInvocationARB(value, id); }
|
||||
ivec2 subgroupBroadcast(ivec2 value, uint id) { return readInvocationARB(value, id); }
|
||||
ivec3 subgroupBroadcast(ivec3 value, uint id) { return readInvocationARB(value, id); }
|
||||
ivec4 subgroupBroadcast(ivec4 value, uint id) { return readInvocationARB(value, id); }
|
||||
uint subgroupBroadcast(uint value, uint id) { return readInvocationARB(value, id); }
|
||||
uvec2 subgroupBroadcast(uvec2 value, uint id) { return readInvocationARB(value, id); }
|
||||
uvec3 subgroupBroadcast(uvec3 value, uint id) { return readInvocationARB(value, id); }
|
||||
uvec4 subgroupBroadcast(uvec4 value, uint id) { return readInvocationARB(value, id); }
|
||||
float subgroupBroadcast(float value, uint id) { return readInvocationARB(value, id); }
|
||||
vec2 subgroupBroadcast(vec2 value, uint id) { return readInvocationARB(value, id); }
|
||||
vec3 subgroupBroadcast(vec3 value, uint id) { return readInvocationARB(value, id); }
|
||||
vec4 subgroupBroadcast(vec4 value, uint id) { return readInvocationARB(value, id); }
|
||||
double subgroupBroadcast(double value, uint id) { return readInvocationARB(value, id); }
|
||||
dvec2 subgroupBroadcast(dvec2 value, uint id) { return readInvocationARB(value, id); }
|
||||
dvec3 subgroupBroadcast(dvec3 value, uint id) { return readInvocationARB(value, id); }
|
||||
dvec4 subgroupBroadcast(dvec4 value, uint id) { return readInvocationARB(value, id); }
|
||||
#elif defined(GL_NV_shader_thread_shuffle)
|
||||
int subgroupBroadcastFirst(int value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
ivec2 subgroupBroadcastFirst(ivec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
ivec3 subgroupBroadcastFirst(ivec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
ivec4 subgroupBroadcastFirst(ivec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
uint subgroupBroadcastFirst(uint value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
uvec2 subgroupBroadcastFirst(uvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
uvec3 subgroupBroadcastFirst(uvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
uvec4 subgroupBroadcastFirst(uvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
float subgroupBroadcastFirst(float value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
vec2 subgroupBroadcastFirst(vec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
vec3 subgroupBroadcastFirst(vec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
vec4 subgroupBroadcastFirst(vec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
double subgroupBroadcastFirst(double value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
dvec2 subgroupBroadcastFirst(dvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
dvec3 subgroupBroadcastFirst(dvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
dvec4 subgroupBroadcastFirst(dvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
|
||||
int subgroupBroadcast(int value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
ivec2 subgroupBroadcast(ivec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
ivec3 subgroupBroadcast(ivec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
ivec4 subgroupBroadcast(ivec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
uint subgroupBroadcast(uint value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
uvec2 subgroupBroadcast(uvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
uvec3 subgroupBroadcast(uvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
uvec4 subgroupBroadcast(uvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
float subgroupBroadcast(float value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
vec2 subgroupBroadcast(vec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
vec3 subgroupBroadcast(vec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
vec4 subgroupBroadcast(vec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
double subgroupBroadcast(double value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
dvec2 subgroupBroadcast(dvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
dvec3 subgroupBroadcast(dvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
dvec4 subgroupBroadcast(dvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }
|
||||
uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }
|
||||
#else
|
||||
uint subgroupBallotFindLSB(uvec4 value) {
|
||||
int firstLive = findLSB(value.x);
|
||||
return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));
|
||||
}
|
||||
uint subgroupBallotFindMSB(uvec4 value) {
|
||||
int firstLive = findMSB(value.y);
|
||||
return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));
|
||||
}
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_vote)
|
||||
#elif defined(GL_AMD_gcn_shader)
|
||||
bool subgroupAll(bool value) { return ballotAMD(value)==ballotAMD(true); }
|
||||
bool subgroupAny(bool value) { return ballotAMD(value)!=0ull; }
|
||||
bool subgroupAllEqual(bool value) { uint64_t b=ballotAMD(value); return b==0uLL || b==ballotAMD(true); }
|
||||
#elif defined(GL_NV_gpu_shader_5)
|
||||
bool subgroupAll(bool value) { return allThreadsNV(value); }
|
||||
bool subgroupAny(bool value) { return anyThreadNV(value); }
|
||||
bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }
|
||||
#elif defined(GL_ARB_shader_group_vote)
|
||||
bool subgroupAll(bool v) { return allInvocationsARB(v); }
|
||||
bool subgroupAny(bool v) { return anyInvocationARB(v); }
|
||||
bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_vote
|
||||
#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return subgroupAllEqual(subgroupBroadcastFirst(value)==value); }
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(int)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec2)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec3)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec4)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uint)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec2)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec3)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec4)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(float)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec2)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec3)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec4)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(double)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec2)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec3)
|
||||
_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec4)
|
||||
#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND
|
||||
#endif
|
||||
#if defined(GL_KHR_shader_subgroup_ballot)
|
||||
#elif defined(GL_NV_shader_thread_group)
|
||||
uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }
|
||||
uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }
|
||||
#elif defined(GL_ARB_shader_ballot)
|
||||
uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_basic
|
||||
bool subgroupElect() {
|
||||
uvec4 activeMask = subgroupBallot(true);
|
||||
uint firstLive = subgroupBallotFindLSB(activeMask);
|
||||
return gl_SubgroupInvocationID == firstLive;
|
||||
}
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_basic
|
||||
void subgroupBarrier() { /*NOOP*/ }
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_basic
|
||||
void subgroupMemoryBarrier() { groupMemoryBarrier(); }
|
||||
void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }
|
||||
void subgroupMemoryBarrierShared() { groupMemoryBarrier(); }
|
||||
void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_ballot
|
||||
bool subgroupInverseBallot(uvec4 value, uint index)
|
||||
{
|
||||
return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));
|
||||
}
|
||||
uint subgroupBallotInclusiveBitCount(uvec4 value)
|
||||
{
|
||||
uvec2 v = value.xy & gl_SubgroupLeMask.xy;
|
||||
ivec2 c = bitCount(v);
|
||||
#ifdef GL_NV_shader_thread_group
|
||||
return uint(c.x);
|
||||
#else
|
||||
return uint(c.x + c.y);
|
||||
#endif
|
||||
}
|
||||
uint subgroupBallotExclusiveBitCount(uvec4 value)
|
||||
{
|
||||
uvec2 v = value.xy & gl_SubgroupLtMask.xy;
|
||||
ivec2 c = bitCount(v);
|
||||
#ifdef GL_NV_shader_thread_group
|
||||
return uint(c.x);
|
||||
#else
|
||||
return uint(c.x + c.y);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_ballot
|
||||
uint subgroupBallotBitCount(uvec4 value)
|
||||
{
|
||||
ivec2 c = bitCount(value.xy);
|
||||
#ifdef GL_NV_shader_thread_group
|
||||
return uint(c.x);
|
||||
#else
|
||||
return uint(c.x + c.y);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#ifndef GL_KHR_shader_subgroup_ballot
|
||||
bool subgroupBallotBitExtract(uvec4 value, uint index)
|
||||
{
|
||||
#ifdef GL_NV_shader_thread_group
|
||||
uint shifted = value.x >> index;
|
||||
#else
|
||||
uint shifted = value[index >> 5u] >> (index & 0x1fu);
|
||||
#endif
|
||||
return (shifted & 1u) != 0u;
|
||||
}
|
||||
#endif
|
||||
void main()
|
||||
{
|
||||
_9.FragColor = float(gl_NumSubgroups);
|
||||
_9.FragColor = float(gl_SubgroupID);
|
||||
_9.FragColor = float(gl_SubgroupSize);
|
||||
_9.FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupMemoryBarrier();
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
_9.FragColor = vec4(gl_SubgroupEqMask).x;
|
||||
_9.FragColor = vec4(gl_SubgroupGeMask).x;
|
||||
_9.FragColor = vec4(gl_SubgroupGtMask).x;
|
||||
_9.FragColor = vec4(gl_SubgroupLeMask).x;
|
||||
_9.FragColor = vec4(gl_SubgroupLtMask).x;
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal_bool = subgroupAllEqual(true);
|
||||
bool has_equal_T = subgroupAllEqual(uvec3(5u));
|
||||
}
|
||||
|
49
shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
Normal file
49
shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
Normal file
@ -0,0 +1,49 @@
|
||||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
float FragColor;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_NumSubgroups);
|
||||
FragColor = float(gl_SubgroupID);
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierShared();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
|
||||
// ballot
|
||||
FragColor = float(gl_SubgroupEqMask);
|
||||
FragColor = float(gl_SubgroupGeMask);
|
||||
FragColor = float(gl_SubgroupGtMask);
|
||||
FragColor = float(gl_SubgroupLeMask);
|
||||
FragColor = float(gl_SubgroupLtMask);
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
|
||||
// vote
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal_bool = subgroupAllEqual(true);
|
||||
bool has_equal_T = subgroupAllEqual(uvec3(5u));
|
||||
}
|
@ -63,8 +63,7 @@ public:
|
||||
private:
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
// MSVC 2013 workarounds, sigh ...
|
||||
union
|
||||
{
|
||||
union {
|
||||
char aligned_char[sizeof(T) * N];
|
||||
double dummy_aligner;
|
||||
} u;
|
||||
@ -212,6 +211,10 @@ public:
|
||||
this->buffer_size = count;
|
||||
}
|
||||
|
||||
SmallVector(std::initializer_list<T> init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end())
|
||||
{
|
||||
}
|
||||
|
||||
SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector()
|
||||
{
|
||||
*this = std::move(other);
|
||||
|
843
spirv_glsl.cpp
843
spirv_glsl.cpp
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@
|
||||
|
||||
#include "GLSL.std.450.h"
|
||||
#include "spirv_cross.hpp"
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
@ -241,7 +242,99 @@ public:
|
||||
// - Images which are statically used at least once with Dref opcodes.
|
||||
bool variable_is_depth_or_compare(VariableID id) const;
|
||||
|
||||
|
||||
protected:
|
||||
struct ShaderSubgroupSupportHelper
|
||||
{
|
||||
// lower enum value = greater priority
|
||||
enum Candidate
|
||||
{
|
||||
KHR_shader_subgroup_ballot,
|
||||
KHR_shader_subgroup_basic,
|
||||
KHR_shader_subgroup_vote,
|
||||
NV_gpu_shader_5,
|
||||
NV_shader_thread_group,
|
||||
NV_shader_thread_shuffle,
|
||||
ARB_shader_ballot,
|
||||
ARB_shader_group_vote,
|
||||
AMD_gcn_shader,
|
||||
|
||||
CandidateCount
|
||||
};
|
||||
static std::string get_extension_name(Candidate c);
|
||||
|
||||
static SmallVector<std::string> get_extra_required_extension_names(Candidate c);
|
||||
|
||||
static std::string get_extra_required_extension_predicate(Candidate c);
|
||||
|
||||
enum Feature
|
||||
{
|
||||
SubgroupMask,
|
||||
SubgroupSize,
|
||||
SubgroupInvocationID,
|
||||
SubgroupID,
|
||||
NumSubgroups,
|
||||
SubgroupBrodcast_First,
|
||||
SubgroupBallotFindLSB_MSB,
|
||||
SubgroupAll_Any_AllEqualBool,
|
||||
SubgroupAllEqualT,
|
||||
SubgroupElect,
|
||||
SubgroupBarrier,
|
||||
SubgroupMemBarrier,
|
||||
SubgroupBallot,
|
||||
SubgroupInverseBallot_InclBitCount_ExclBitCout,
|
||||
SubgroupBallotBitExtract,
|
||||
SubgroupBallotBitCount,
|
||||
|
||||
FeatureCount
|
||||
};
|
||||
|
||||
using FeatureMask = uint32_t;
|
||||
static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits.");
|
||||
|
||||
static SmallVector<Feature> get_feature_dependencies(Feature feature);
|
||||
|
||||
static FeatureMask get_feature_dependency_mask(Feature feature);
|
||||
|
||||
static bool can_feature_be_implemented_wo_extensions(Feature feature);
|
||||
|
||||
static Candidate get_KHR_extension_for_feature(Feature feature);
|
||||
|
||||
struct Result
|
||||
{
|
||||
Result();
|
||||
|
||||
inline uint32_t operator[](Candidate c) const
|
||||
{
|
||||
return weights[c];
|
||||
}
|
||||
inline uint32_t &operator[](Candidate c)
|
||||
{
|
||||
return weights[c];
|
||||
}
|
||||
|
||||
std::array<uint32_t, CandidateCount> weights;
|
||||
};
|
||||
|
||||
void request_feature(Feature ft);
|
||||
|
||||
bool is_feature_requested(Feature ft) const;
|
||||
|
||||
Result resolve() const;
|
||||
|
||||
static SmallVector<Candidate, CandidateCount> get_candidates_for_feature(Feature ft, const Result &r);
|
||||
|
||||
private:
|
||||
static SmallVector<Candidate, CandidateCount> get_candidates_for_feature(Feature ft);
|
||||
|
||||
static FeatureMask build_mask(SmallVector<Feature> features);
|
||||
|
||||
FeatureMask featureMask = 0;
|
||||
};
|
||||
|
||||
// TODO remove this function when all subgroup ops are supported (or make it always return true)
|
||||
static bool is_supported_subgroup_op(spv::Op op);
|
||||
|
||||
void reset();
|
||||
void emit_function(SPIRFunction &func, const Bitset &return_flags);
|
||||
|
||||
@ -272,6 +365,8 @@ protected:
|
||||
void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
|
||||
const SpecializationConstant &y, const SpecializationConstant &z);
|
||||
|
||||
void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature);
|
||||
|
||||
virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
|
||||
virtual void emit_texture_op(const Instruction &i, bool sparse);
|
||||
virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
|
||||
@ -483,6 +578,7 @@ protected:
|
||||
|
||||
void emit_struct(SPIRType &type);
|
||||
void emit_resources();
|
||||
void emit_extension_workarounds(spv::ExecutionModel model);
|
||||
void emit_buffer_block_native(const SPIRVariable &var);
|
||||
void emit_buffer_reference_block(SPIRType &type, bool forward_declaration);
|
||||
void emit_buffer_block_legacy(const SPIRVariable &var);
|
||||
@ -680,6 +776,8 @@ protected:
|
||||
std::unordered_set<uint32_t> flattened_buffer_blocks;
|
||||
std::unordered_map<uint32_t, bool> flattened_structs;
|
||||
|
||||
ShaderSubgroupSupportHelper shader_subgroup_supporter;
|
||||
|
||||
std::string load_flattened_struct(const std::string &basename, const SPIRType &type);
|
||||
std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index);
|
||||
void store_flattened_struct(uint32_t lhs_id, uint32_t value);
|
||||
|
Loading…
Reference in New Issue
Block a user