rhi: Expose compute threadgroup limits in ResourceLimits
As OpenGL ES and Vulkan ruin the day with the spec mandated minimum value for max threads per threadgroup being only 128, clients need a way to decide if their compute shader (local_size_*) is suitable for use at run time. Change-Id: I72b4fc97032406340623add82ea4d9544ebe9fdc Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
parent
6f2c7469f8
commit
8fe16fef28
@ -675,6 +675,32 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general")
|
||||
frames (including the one that contains the readback) after which an
|
||||
asynchronous texture or buffer readback is guaranteed to complete upon
|
||||
\l{QRhi::beginFrame()}{starting a new frame}.
|
||||
|
||||
\value MaxThreadGroupsPerDimension The maximum number of compute
|
||||
work/thread groups that can be dispatched. Effectively the maximum value
|
||||
for the arguments of QRhiCommandBuffer::dispatch(). Typically 65535.
|
||||
|
||||
\value MaxThreadsPerThreadGroup The maximum number of invocations in a
|
||||
single local work group, or in other terminology, the maximum number of
|
||||
threads in a thread group. Effectively the maximum value for the product of
|
||||
\c local_size_x, \c local_size_y, and \c local_size_z in the compute
|
||||
shader. Typical values are 128, 256, 512, 1024, or 1536. Watch out that
|
||||
both OpenGL ES and Vulkan specify only 128 as the minimum required limit
|
||||
for implementations. While uncommon for Vulkan, some OpenGL ES 3.1
|
||||
implementations for mobile/embedded devices only support the spec-mandated
|
||||
minimum value.
|
||||
|
||||
\value MaxThreadGroupX The maximum size of a work/thread group in the X
|
||||
dimension. Effectively the maximum value of \c local_size_x in the compute
|
||||
shader. Typically 256 or 1024.
|
||||
|
||||
\value MaxThreadGroupY The maximum size of a work/thread group in the Y
|
||||
dimension. Effectively the maximum value of \c local_size_y in the compute
|
||||
shader. Typically 256 or 1024.
|
||||
|
||||
\value MaxThreadGroupZ The maximum size of a work/thread group in the Z
|
||||
dimension. Effectively the maximum value of \c local_size_z in the compute
|
||||
shader. Typically 64 or 256.
|
||||
*/
|
||||
|
||||
/*!
|
||||
|
@ -1490,7 +1490,12 @@ public:
|
||||
TextureSizeMax,
|
||||
MaxColorAttachments,
|
||||
FramesInFlight,
|
||||
MaxAsyncReadbackFrames
|
||||
MaxAsyncReadbackFrames,
|
||||
MaxThreadGroupsPerDimension,
|
||||
MaxThreadsPerThreadGroup,
|
||||
MaxThreadGroupX,
|
||||
MaxThreadGroupY,
|
||||
MaxThreadGroupZ
|
||||
};
|
||||
|
||||
~QRhi();
|
||||
|
@ -554,6 +554,16 @@ int QRhiD3D11::resourceLimit(QRhi::ResourceLimit limit) const
|
||||
return 1;
|
||||
case QRhi::MaxAsyncReadbackFrames:
|
||||
return 1;
|
||||
case QRhi::MaxThreadGroupsPerDimension:
|
||||
return D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;
|
||||
case QRhi::MaxThreadsPerThreadGroup:
|
||||
return D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
|
||||
case QRhi::MaxThreadGroupX:
|
||||
return D3D11_CS_THREAD_GROUP_MAX_X;
|
||||
case QRhi::MaxThreadGroupY:
|
||||
return D3D11_CS_THREAD_GROUP_MAX_Y;
|
||||
case QRhi::MaxThreadGroupZ:
|
||||
return D3D11_CS_THREAD_GROUP_MAX_Z;
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -301,6 +301,18 @@ QT_BEGIN_NAMESPACE
|
||||
#define GL_TEXTURE_2D_MULTISAMPLE 0x9100
|
||||
#endif
|
||||
|
||||
#ifndef GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
|
||||
#endif
|
||||
|
||||
#ifndef GL_MAX_COMPUTE_WORK_GROUP_COUNT
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
|
||||
#endif
|
||||
|
||||
#ifndef GL_MAX_COMPUTE_WORK_GROUP_SIZE
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
|
||||
#endif
|
||||
|
||||
/*!
|
||||
Constructs a new QRhiGles2InitParams.
|
||||
|
||||
@ -514,6 +526,18 @@ bool QRhiGles2::create(QRhi::Flags flags)
|
||||
else
|
||||
caps.compute = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); // 4.3
|
||||
|
||||
if (caps.compute) {
|
||||
f->glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &caps.maxThreadsPerThreadGroup);
|
||||
GLint tgPerDim[3];
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &tgPerDim[0]);
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &tgPerDim[1]);
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &tgPerDim[2]);
|
||||
caps.maxThreadGroupsPerDimension = qMin(tgPerDim[0], qMin(tgPerDim[1], tgPerDim[2]));
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &caps.maxThreadGroupsX);
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &caps.maxThreadGroupsY);
|
||||
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &caps.maxThreadGroupsZ);
|
||||
}
|
||||
|
||||
if (caps.gles)
|
||||
caps.textureCompareMode = caps.ctxMajor >= 3; // ES 3.0
|
||||
else
|
||||
@ -931,6 +955,16 @@ int QRhiGles2::resourceLimit(QRhi::ResourceLimit limit) const
|
||||
return 1;
|
||||
case QRhi::MaxAsyncReadbackFrames:
|
||||
return 1;
|
||||
case QRhi::MaxThreadGroupsPerDimension:
|
||||
return caps.maxThreadGroupsPerDimension;
|
||||
case QRhi::MaxThreadsPerThreadGroup:
|
||||
return caps.maxThreadsPerThreadGroup;
|
||||
case QRhi::MaxThreadGroupX:
|
||||
return caps.maxThreadGroupsX;
|
||||
case QRhi::MaxThreadGroupY:
|
||||
return caps.maxThreadGroupsY;
|
||||
case QRhi::MaxThreadGroupZ:
|
||||
return caps.maxThreadGroupsZ;
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -848,6 +848,12 @@ public:
|
||||
ctxMinor(0),
|
||||
maxTextureSize(2048),
|
||||
maxDrawBuffers(4),
|
||||
maxSamples(16),
|
||||
maxThreadGroupsPerDimension(0),
|
||||
maxThreadsPerThreadGroup(0),
|
||||
maxThreadGroupsX(0),
|
||||
maxThreadGroupsY(0),
|
||||
maxThreadGroupsZ(0),
|
||||
msaaRenderBuffer(false),
|
||||
multisampledTexture(false),
|
||||
npotTextureFull(true),
|
||||
@ -882,6 +888,11 @@ public:
|
||||
int maxTextureSize;
|
||||
int maxDrawBuffers;
|
||||
int maxSamples;
|
||||
int maxThreadGroupsPerDimension;
|
||||
int maxThreadsPerThreadGroup;
|
||||
int maxThreadGroupsX;
|
||||
int maxThreadGroupsY;
|
||||
int maxThreadGroupsZ;
|
||||
// Multisample fb and blit are supported (GLES 3.0 or OpenGL 3.x). Not
|
||||
// the same as multisample textures!
|
||||
uint msaaRenderBuffer : 1;
|
||||
|
@ -587,6 +587,20 @@ int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const
|
||||
return QMTL_FRAMES_IN_FLIGHT;
|
||||
case QRhi::MaxAsyncReadbackFrames:
|
||||
return QMTL_FRAMES_IN_FLIGHT;
|
||||
case QRhi::MaxThreadGroupsPerDimension:
|
||||
return 65535;
|
||||
case QRhi::MaxThreadsPerThreadGroup:
|
||||
Q_FALLTHROUGH();
|
||||
case QRhi::MaxThreadGroupX:
|
||||
Q_FALLTHROUGH();
|
||||
case QRhi::MaxThreadGroupY:
|
||||
Q_FALLTHROUGH();
|
||||
case QRhi::MaxThreadGroupZ:
|
||||
#if defined(Q_OS_MACOS)
|
||||
return 1024;
|
||||
#else
|
||||
return 512;
|
||||
#endif
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -149,6 +149,16 @@ int QRhiNull::resourceLimit(QRhi::ResourceLimit limit) const
|
||||
return 1;
|
||||
case QRhi::MaxAsyncReadbackFrames:
|
||||
return 1;
|
||||
case QRhi::MaxThreadGroupsPerDimension:
|
||||
return 0;
|
||||
case QRhi::MaxThreadsPerThreadGroup:
|
||||
return 0;
|
||||
case QRhi::MaxThreadGroupX:
|
||||
return 0;
|
||||
case QRhi::MaxThreadGroupY:
|
||||
return 0;
|
||||
case QRhi::MaxThreadGroupZ:
|
||||
return 0;
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -4124,6 +4124,18 @@ int QRhiVulkan::resourceLimit(QRhi::ResourceLimit limit) const
|
||||
return QVK_FRAMES_IN_FLIGHT;
|
||||
case QRhi::MaxAsyncReadbackFrames:
|
||||
return QVK_FRAMES_IN_FLIGHT;
|
||||
case QRhi::MaxThreadGroupsPerDimension:
|
||||
return int(qMin(physDevProperties.limits.maxComputeWorkGroupCount[0],
|
||||
qMin(physDevProperties.limits.maxComputeWorkGroupCount[1],
|
||||
physDevProperties.limits.maxComputeWorkGroupCount[2])));
|
||||
case QRhi::MaxThreadsPerThreadGroup:
|
||||
return int(physDevProperties.limits.maxComputeWorkGroupInvocations);
|
||||
case QRhi::MaxThreadGroupX:
|
||||
return int(physDevProperties.limits.maxComputeWorkGroupSize[0]);
|
||||
case QRhi::MaxThreadGroupY:
|
||||
return int(physDevProperties.limits.maxComputeWorkGroupSize[1]);
|
||||
case QRhi::MaxThreadGroupZ:
|
||||
return int(physDevProperties.limits.maxComputeWorkGroupSize[2]);
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
return 0;
|
||||
|
@ -184,6 +184,11 @@ void Window::customInit()
|
||||
qDebug("Min 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMin));
|
||||
qDebug("Max 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMax));
|
||||
qDebug("Max color attachment count: %d", m_r->resourceLimit(QRhi::MaxColorAttachments));
|
||||
qDebug("MaxThreadGroupsPerDimension: %d", m_r->resourceLimit(QRhi::MaxThreadGroupsPerDimension));
|
||||
qDebug("MaxThreadsPerThreadGroup: %d", m_r->resourceLimit(QRhi::MaxThreadsPerThreadGroup));
|
||||
qDebug("MaxThreadGroupX: %d", m_r->resourceLimit(QRhi::MaxThreadGroupX));
|
||||
qDebug("MaxThreadGroupY: %d", m_r->resourceLimit(QRhi::MaxThreadGroupY));
|
||||
qDebug("MaxThreadGroupZ: %d", m_r->resourceLimit(QRhi::MaxThreadGroupZ));
|
||||
}
|
||||
|
||||
void Window::customRelease()
|
||||
|
Loading…
Reference in New Issue
Block a user