rhi: Expose compute threadgroup limits in ResourceLimits

As OpenGL ES and Vulkan ruin the day with the spec mandated minimum
value for max threads per threadgroup being only 128, clients need
a way to decide if their compute shader (local_size_*) is suitable
for use at run time.

Change-Id: I72b4fc97032406340623add82ea4d9544ebe9fdc
Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
Laszlo Agocs 2020-09-17 15:38:40 +02:00
parent 6f2c7469f8
commit 8fe16fef28
9 changed files with 128 additions and 1 deletions

View File

@ -675,6 +675,32 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general")
frames (including the one that contains the readback) after which an
asynchronous texture or buffer readback is guaranteed to complete upon
\l{QRhi::beginFrame()}{starting a new frame}.
\value MaxThreadGroupsPerDimension The maximum number of compute
work/thread groups that can be dispatched. Effectively the maximum value
for the arguments of QRhiCommandBuffer::dispatch(). Typically 65535.
\value MaxThreadsPerThreadGroup The maximum number of invocations in a
single local work group, or in other terminology, the maximum number of
threads in a thread group. Effectively the maximum value for the product of
\c local_size_x, \c local_size_y, and \c local_size_z in the compute
shader. Typical values are 128, 256, 512, 1024, or 1536. Watch out that
both OpenGL ES and Vulkan specify only 128 as the minimum required limit
for implementations. While uncommon for Vulkan, some OpenGL ES 3.1
implementations for mobile/embedded devices only support the spec-mandated
minimum value.
\value MaxThreadGroupX The maximum size of a work/thread group in the X
dimension. Effectively the maximum value of \c local_size_x in the compute
shader. Typically 256 or 1024.
\value MaxThreadGroupY The maximum size of a work/thread group in the Y
dimension. Effectively the maximum value of \c local_size_y in the compute
shader. Typically 256 or 1024.
\value MaxThreadGroupZ The maximum size of a work/thread group in the Z
dimension. Effectively the maximum value of \c local_size_z in the compute
shader. Typically 64 or 256.
*/
/*!

View File

@ -1490,7 +1490,12 @@ public:
TextureSizeMax,
MaxColorAttachments,
FramesInFlight,
MaxAsyncReadbackFrames
MaxAsyncReadbackFrames,
MaxThreadGroupsPerDimension,
MaxThreadsPerThreadGroup,
MaxThreadGroupX,
MaxThreadGroupY,
MaxThreadGroupZ
};
~QRhi();

View File

@ -554,6 +554,16 @@ int QRhiD3D11::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
case QRhi::MaxThreadGroupsPerDimension:
return D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;
case QRhi::MaxThreadsPerThreadGroup:
return D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
case QRhi::MaxThreadGroupX:
return D3D11_CS_THREAD_GROUP_MAX_X;
case QRhi::MaxThreadGroupY:
return D3D11_CS_THREAD_GROUP_MAX_Y;
case QRhi::MaxThreadGroupZ:
return D3D11_CS_THREAD_GROUP_MAX_Z;
default:
Q_UNREACHABLE();
return 0;

View File

@ -301,6 +301,18 @@ QT_BEGIN_NAMESPACE
#define GL_TEXTURE_2D_MULTISAMPLE 0x9100
#endif
#ifndef GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#endif
#ifndef GL_MAX_COMPUTE_WORK_GROUP_COUNT
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#endif
#ifndef GL_MAX_COMPUTE_WORK_GROUP_SIZE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#endif
/*!
Constructs a new QRhiGles2InitParams.
@ -514,6 +526,18 @@ bool QRhiGles2::create(QRhi::Flags flags)
else
caps.compute = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); // 4.3
if (caps.compute) {
f->glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &caps.maxThreadsPerThreadGroup);
GLint tgPerDim[3];
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &tgPerDim[0]);
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &tgPerDim[1]);
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &tgPerDim[2]);
caps.maxThreadGroupsPerDimension = qMin(tgPerDim[0], qMin(tgPerDim[1], tgPerDim[2]));
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &caps.maxThreadGroupsX);
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &caps.maxThreadGroupsY);
f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &caps.maxThreadGroupsZ);
}
if (caps.gles)
caps.textureCompareMode = caps.ctxMajor >= 3; // ES 3.0
else
@ -931,6 +955,16 @@ int QRhiGles2::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
case QRhi::MaxThreadGroupsPerDimension:
return caps.maxThreadGroupsPerDimension;
case QRhi::MaxThreadsPerThreadGroup:
return caps.maxThreadsPerThreadGroup;
case QRhi::MaxThreadGroupX:
return caps.maxThreadGroupsX;
case QRhi::MaxThreadGroupY:
return caps.maxThreadGroupsY;
case QRhi::MaxThreadGroupZ:
return caps.maxThreadGroupsZ;
default:
Q_UNREACHABLE();
return 0;

View File

@ -848,6 +848,12 @@ public:
ctxMinor(0),
maxTextureSize(2048),
maxDrawBuffers(4),
maxSamples(16),
maxThreadGroupsPerDimension(0),
maxThreadsPerThreadGroup(0),
maxThreadGroupsX(0),
maxThreadGroupsY(0),
maxThreadGroupsZ(0),
msaaRenderBuffer(false),
multisampledTexture(false),
npotTextureFull(true),
@ -882,6 +888,11 @@ public:
int maxTextureSize;
int maxDrawBuffers;
int maxSamples;
int maxThreadGroupsPerDimension;
int maxThreadsPerThreadGroup;
int maxThreadGroupsX;
int maxThreadGroupsY;
int maxThreadGroupsZ;
// Multisample fb and blit are supported (GLES 3.0 or OpenGL 3.x). Not
// the same as multisample textures!
uint msaaRenderBuffer : 1;

View File

@ -587,6 +587,20 @@ int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const
return QMTL_FRAMES_IN_FLIGHT;
case QRhi::MaxAsyncReadbackFrames:
return QMTL_FRAMES_IN_FLIGHT;
case QRhi::MaxThreadGroupsPerDimension:
return 65535;
case QRhi::MaxThreadsPerThreadGroup:
Q_FALLTHROUGH();
case QRhi::MaxThreadGroupX:
Q_FALLTHROUGH();
case QRhi::MaxThreadGroupY:
Q_FALLTHROUGH();
case QRhi::MaxThreadGroupZ:
#if defined(Q_OS_MACOS)
return 1024;
#else
return 512;
#endif
default:
Q_UNREACHABLE();
return 0;

View File

@ -149,6 +149,16 @@ int QRhiNull::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
case QRhi::MaxThreadGroupsPerDimension:
return 0;
case QRhi::MaxThreadsPerThreadGroup:
return 0;
case QRhi::MaxThreadGroupX:
return 0;
case QRhi::MaxThreadGroupY:
return 0;
case QRhi::MaxThreadGroupZ:
return 0;
default:
Q_UNREACHABLE();
return 0;

View File

@ -4124,6 +4124,18 @@ int QRhiVulkan::resourceLimit(QRhi::ResourceLimit limit) const
return QVK_FRAMES_IN_FLIGHT;
case QRhi::MaxAsyncReadbackFrames:
return QVK_FRAMES_IN_FLIGHT;
case QRhi::MaxThreadGroupsPerDimension:
return int(qMin(physDevProperties.limits.maxComputeWorkGroupCount[0],
qMin(physDevProperties.limits.maxComputeWorkGroupCount[1],
physDevProperties.limits.maxComputeWorkGroupCount[2])));
case QRhi::MaxThreadsPerThreadGroup:
return int(physDevProperties.limits.maxComputeWorkGroupInvocations);
case QRhi::MaxThreadGroupX:
return int(physDevProperties.limits.maxComputeWorkGroupSize[0]);
case QRhi::MaxThreadGroupY:
return int(physDevProperties.limits.maxComputeWorkGroupSize[1]);
case QRhi::MaxThreadGroupZ:
return int(physDevProperties.limits.maxComputeWorkGroupSize[2]);
default:
Q_UNREACHABLE();
return 0;

View File

@ -184,6 +184,11 @@ void Window::customInit()
qDebug("Min 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMin));
qDebug("Max 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMax));
qDebug("Max color attachment count: %d", m_r->resourceLimit(QRhi::MaxColorAttachments));
qDebug("MaxThreadGroupsPerDimension: %d", m_r->resourceLimit(QRhi::MaxThreadGroupsPerDimension));
qDebug("MaxThreadsPerThreadGroup: %d", m_r->resourceLimit(QRhi::MaxThreadsPerThreadGroup));
qDebug("MaxThreadGroupX: %d", m_r->resourceLimit(QRhi::MaxThreadGroupX));
qDebug("MaxThreadGroupY: %d", m_r->resourceLimit(QRhi::MaxThreadGroupY));
qDebug("MaxThreadGroupZ: %d", m_r->resourceLimit(QRhi::MaxThreadGroupZ));
}
void Window::customRelease()