From 8fe16fef283d281454d575829c2411ec923a5c8b Mon Sep 17 00:00:00 2001 From: Laszlo Agocs Date: Thu, 17 Sep 2020 15:38:40 +0200 Subject: [PATCH] rhi: Expose compute threadgroup limits in ResourceLimits As OpenGL ES and Vulkan ruin the day with the spec mandated minimum value for max threads per threadgroup being only 128, clients need a way to decide if their compute shader (local_size_*) is suitable for use at run time. Change-Id: I72b4fc97032406340623add82ea4d9544ebe9fdc Reviewed-by: Andy Nichols --- src/gui/rhi/qrhi.cpp | 26 +++++++++++++++ src/gui/rhi/qrhi_p.h | 7 +++- src/gui/rhi/qrhid3d11.cpp | 10 ++++++ src/gui/rhi/qrhigles2.cpp | 34 ++++++++++++++++++++ src/gui/rhi/qrhigles2_p_p.h | 11 +++++++ src/gui/rhi/qrhimetal.mm | 14 ++++++++ src/gui/rhi/qrhinull.cpp | 10 ++++++ src/gui/rhi/qrhivulkan.cpp | 12 +++++++ tests/manual/rhi/triquadcube/triquadcube.cpp | 5 +++ 9 files changed, 128 insertions(+), 1 deletion(-) diff --git a/src/gui/rhi/qrhi.cpp b/src/gui/rhi/qrhi.cpp index 06c3903b0c..794e6a6891 100644 --- a/src/gui/rhi/qrhi.cpp +++ b/src/gui/rhi/qrhi.cpp @@ -675,6 +675,32 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general") frames (including the one that contains the readback) after which an asynchronous texture or buffer readback is guaranteed to complete upon \l{QRhi::beginFrame()}{starting a new frame}. + + \value MaxThreadGroupsPerDimension The maximum number of compute + work/thread groups that can be dispatched. Effectively the maximum value + for the arguments of QRhiCommandBuffer::dispatch(). Typically 65535. + + \value MaxThreadsPerThreadGroup The maximum number of invocations in a + single local work group, or in other terminology, the maximum number of + threads in a thread group. Effectively the maximum value for the product of + \c local_size_x, \c local_size_y, and \c local_size_z in the compute + shader. Typical values are 128, 256, 512, 1024, or 1536. Watch out that + both OpenGL ES and Vulkan specify only 128 as the minimum required limit + for implementations. While uncommon for Vulkan, some OpenGL ES 3.1 + implementations for mobile/embedded devices only support the spec-mandated + minimum value. + + \value MaxThreadGroupX The maximum size of a work/thread group in the X + dimension. Effectively the maximum value of \c local_size_x in the compute + shader. Typically 256 or 1024. + + \value MaxThreadGroupY The maximum size of a work/thread group in the Y + dimension. Effectively the maximum value of \c local_size_y in the compute + shader. Typically 256 or 1024. + + \value MaxThreadGroupZ The maximum size of a work/thread group in the Z + dimension. Effectively the maximum value of \c local_size_z in the compute + shader. Typically 64 or 256. */ /*! diff --git a/src/gui/rhi/qrhi_p.h b/src/gui/rhi/qrhi_p.h index 7d719fd218..1e3540fa1f 100644 --- a/src/gui/rhi/qrhi_p.h +++ b/src/gui/rhi/qrhi_p.h @@ -1490,7 +1490,12 @@ public: TextureSizeMax, MaxColorAttachments, FramesInFlight, - MaxAsyncReadbackFrames + MaxAsyncReadbackFrames, + MaxThreadGroupsPerDimension, + MaxThreadsPerThreadGroup, + MaxThreadGroupX, + MaxThreadGroupY, + MaxThreadGroupZ }; ~QRhi(); diff --git a/src/gui/rhi/qrhid3d11.cpp b/src/gui/rhi/qrhid3d11.cpp index d5c32cde2c..be79b26b9c 100644 --- a/src/gui/rhi/qrhid3d11.cpp +++ b/src/gui/rhi/qrhid3d11.cpp @@ -554,6 +554,16 @@ int QRhiD3D11::resourceLimit(QRhi::ResourceLimit limit) const return 1; case QRhi::MaxAsyncReadbackFrames: return 1; + case QRhi::MaxThreadGroupsPerDimension: + return D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + case QRhi::MaxThreadsPerThreadGroup: + return D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + case QRhi::MaxThreadGroupX: + return D3D11_CS_THREAD_GROUP_MAX_X; + case QRhi::MaxThreadGroupY: + return D3D11_CS_THREAD_GROUP_MAX_Y; + case QRhi::MaxThreadGroupZ: + return D3D11_CS_THREAD_GROUP_MAX_Z; default: Q_UNREACHABLE(); return 0; diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index 72a8cc539b..4440182264 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -301,6 +301,18 @@ QT_BEGIN_NAMESPACE #define GL_TEXTURE_2D_MULTISAMPLE 0x9100 #endif +#ifndef GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS +#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB +#endif + +#ifndef GL_MAX_COMPUTE_WORK_GROUP_COUNT +#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE +#endif + +#ifndef GL_MAX_COMPUTE_WORK_GROUP_SIZE +#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF +#endif + /*! Constructs a new QRhiGles2InitParams. @@ -514,6 +526,18 @@ bool QRhiGles2::create(QRhi::Flags flags) else caps.compute = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); // 4.3 + if (caps.compute) { + f->glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &caps.maxThreadsPerThreadGroup); + GLint tgPerDim[3]; + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &tgPerDim[0]); + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &tgPerDim[1]); + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &tgPerDim[2]); + caps.maxThreadGroupsPerDimension = qMin(tgPerDim[0], qMin(tgPerDim[1], tgPerDim[2])); + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &caps.maxThreadGroupsX); + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &caps.maxThreadGroupsY); + f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &caps.maxThreadGroupsZ); + } + if (caps.gles) caps.textureCompareMode = caps.ctxMajor >= 3; // ES 3.0 else @@ -931,6 +955,16 @@ int QRhiGles2::resourceLimit(QRhi::ResourceLimit limit) const return 1; case QRhi::MaxAsyncReadbackFrames: return 1; + case QRhi::MaxThreadGroupsPerDimension: + return caps.maxThreadGroupsPerDimension; + case QRhi::MaxThreadsPerThreadGroup: + return caps.maxThreadsPerThreadGroup; + case QRhi::MaxThreadGroupX: + return caps.maxThreadGroupsX; + case QRhi::MaxThreadGroupY: + return caps.maxThreadGroupsY; + case QRhi::MaxThreadGroupZ: + return caps.maxThreadGroupsZ; default: Q_UNREACHABLE(); return 0; diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index 9392254a78..1410e6ac38 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -848,6 +848,12 @@ public: ctxMinor(0), maxTextureSize(2048), maxDrawBuffers(4), + maxSamples(16), + maxThreadGroupsPerDimension(0), + maxThreadsPerThreadGroup(0), + maxThreadGroupsX(0), + maxThreadGroupsY(0), + maxThreadGroupsZ(0), msaaRenderBuffer(false), multisampledTexture(false), npotTextureFull(true), @@ -882,6 +888,11 @@ public: int maxTextureSize; int maxDrawBuffers; int maxSamples; + int maxThreadGroupsPerDimension; + int maxThreadsPerThreadGroup; + int maxThreadGroupsX; + int maxThreadGroupsY; + int maxThreadGroupsZ; // Multisample fb and blit are supported (GLES 3.0 or OpenGL 3.x). Not // the same as multisample textures! uint msaaRenderBuffer : 1; diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index fbb2003fb2..2736377c78 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -587,6 +587,20 @@ int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const return QMTL_FRAMES_IN_FLIGHT; case QRhi::MaxAsyncReadbackFrames: return QMTL_FRAMES_IN_FLIGHT; + case QRhi::MaxThreadGroupsPerDimension: + return 65535; + case QRhi::MaxThreadsPerThreadGroup: + Q_FALLTHROUGH(); + case QRhi::MaxThreadGroupX: + Q_FALLTHROUGH(); + case QRhi::MaxThreadGroupY: + Q_FALLTHROUGH(); + case QRhi::MaxThreadGroupZ: +#if defined(Q_OS_MACOS) + return 1024; +#else + return 512; +#endif default: Q_UNREACHABLE(); return 0; diff --git a/src/gui/rhi/qrhinull.cpp b/src/gui/rhi/qrhinull.cpp index a81f885d5a..48fca4e212 100644 --- a/src/gui/rhi/qrhinull.cpp +++ b/src/gui/rhi/qrhinull.cpp @@ -149,6 +149,16 @@ int QRhiNull::resourceLimit(QRhi::ResourceLimit limit) const return 1; case QRhi::MaxAsyncReadbackFrames: return 1; + case QRhi::MaxThreadGroupsPerDimension: + return 0; + case QRhi::MaxThreadsPerThreadGroup: + return 0; + case QRhi::MaxThreadGroupX: + return 0; + case QRhi::MaxThreadGroupY: + return 0; + case QRhi::MaxThreadGroupZ: + return 0; default: Q_UNREACHABLE(); return 0; diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp index b172a8b16b..e4ad562805 100644 --- a/src/gui/rhi/qrhivulkan.cpp +++ b/src/gui/rhi/qrhivulkan.cpp @@ -4124,6 +4124,18 @@ int QRhiVulkan::resourceLimit(QRhi::ResourceLimit limit) const return QVK_FRAMES_IN_FLIGHT; case QRhi::MaxAsyncReadbackFrames: return QVK_FRAMES_IN_FLIGHT; + case QRhi::MaxThreadGroupsPerDimension: + return int(qMin(physDevProperties.limits.maxComputeWorkGroupCount[0], + qMin(physDevProperties.limits.maxComputeWorkGroupCount[1], + physDevProperties.limits.maxComputeWorkGroupCount[2]))); + case QRhi::MaxThreadsPerThreadGroup: + return int(physDevProperties.limits.maxComputeWorkGroupInvocations); + case QRhi::MaxThreadGroupX: + return int(physDevProperties.limits.maxComputeWorkGroupSize[0]); + case QRhi::MaxThreadGroupY: + return int(physDevProperties.limits.maxComputeWorkGroupSize[1]); + case QRhi::MaxThreadGroupZ: + return int(physDevProperties.limits.maxComputeWorkGroupSize[2]); default: Q_UNREACHABLE(); return 0; diff --git a/tests/manual/rhi/triquadcube/triquadcube.cpp b/tests/manual/rhi/triquadcube/triquadcube.cpp index 252ec63e21..ecb1160207 100644 --- a/tests/manual/rhi/triquadcube/triquadcube.cpp +++ b/tests/manual/rhi/triquadcube/triquadcube.cpp @@ -184,6 +184,11 @@ void Window::customInit() qDebug("Min 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMin)); qDebug("Max 2D texture width/height: %d", m_r->resourceLimit(QRhi::TextureSizeMax)); qDebug("Max color attachment count: %d", m_r->resourceLimit(QRhi::MaxColorAttachments)); + qDebug("MaxThreadGroupsPerDimension: %d", m_r->resourceLimit(QRhi::MaxThreadGroupsPerDimension)); + qDebug("MaxThreadsPerThreadGroup: %d", m_r->resourceLimit(QRhi::MaxThreadsPerThreadGroup)); + qDebug("MaxThreadGroupX: %d", m_r->resourceLimit(QRhi::MaxThreadGroupX)); + qDebug("MaxThreadGroupY: %d", m_r->resourceLimit(QRhi::MaxThreadGroupY)); + qDebug("MaxThreadGroupZ: %d", m_r->resourceLimit(QRhi::MaxThreadGroupZ)); } void Window::customRelease()