rhi: Keep track of pipeline creation times

Make our QRhiMemAllocStats struct a bit more generic, drop the memory
allocation part in the naming, and use the same getter and struct for
reporting some important timings. (we are free to rename for now, there
are no users in other modules yet)

The time spent in graphics (or compute) pipeline creation has a special
relevance in particular with the modern APIs (as it is the single
biggest potentially time consuming blocking operation), but also highly
interesting with others like D3D11 simply because that's where we do the
expensive source-to-intermediate compilation is HLSL source is provided.
In order to see the effects of the various caching mechanisms (of which
there can be confusingly many, on multiple levels), the ability to see
how much time we spent on pipeline creation e.g. until we render the
first view of an application can be pretty essential.

Task-number: QTBUG-103802
Change-Id: I85dd056a39db7e6b25fb1f9d02e4c94298d22b41
Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
Laszlo Agocs 2022-06-10 12:08:54 +02:00
parent a09e518f65
commit 6ec339c484
14 changed files with 144 additions and 38 deletions

View File

@ -6962,7 +6962,7 @@ void QRhi::setPipelineCacheData(const QByteArray &data)
}
/*!
\struct QRhiMemAllocStats
\struct QRhiStats
\internal
\inmodule QtGui
@ -6970,10 +6970,12 @@ void QRhi::setPipelineCacheData(const QByteArray &data)
*/
#ifndef QT_NO_DEBUG_STREAM
QDebug operator<<(QDebug dbg, const QRhiMemAllocStats &info)
QDebug operator<<(QDebug dbg, const QRhiStats &info)
{
QDebugStateSaver saver(dbg);
dbg.nospace() << "QRhiMemAllocStats(blockCount=" << info.blockCount
dbg.nospace() << "QRhiStats("
<< "totalPipelineCreationTime=" << info.totalPipelineCreationTime
<< " blockCount=" << info.blockCount
<< " allocCount=" << info.allocCount
<< " usedBytes=" << info.usedBytes
<< " unusedBytes=" << info.unusedBytes
@ -6983,21 +6985,46 @@ QDebug operator<<(QDebug dbg, const QRhiMemAllocStats &info)
#endif
/*!
Gathers and returns some statistics about the memory allocation of graphics
resources. Only supported with some backends. With graphics APIs where
there is no lower level control over resource memory allocations, this will
never be supported and all fields in the results are 0.
Gathers and returns statistics about the timings and allocations of
graphics resources.
With Vulkan, the values are valid always, and are queried from the
underlying memory allocator library. This gives an insight into the memory
requirements of the active buffers and textures.
Data about memory allocations is only available with some backends, where
such operations are under Qt's control. With graphics APIs where there is
no lower level control over resource memory allocations, this will never be
supported and all relevant fields in the results are 0.
\note Gathering the data may not be free, and therefore the function should
not be called at a high frequency.
With Vulkan in particular, the values are valid always, and are queried
from the underlying memory allocator library. This gives an insight into
the memory requirements of the active buffers and textures.
\warning Gathering some of the data may be an expensive operation, and
therefore the function must not be called at a high frequency.
Additional data, such as the total time in milliseconds spent in graphics
and compute pipeline creation (which usually involves shader compilation or
cache lookups, and potentially expensive processing) is available with most
backends.
\note The elapsed times for operations such as pipeline creation may be
affected by various factors. The results should not be compared between
different backends since the concept of "pipelines" and what exactly
happens under the hood during, for instance, a call to
QRhiGraphicsPipeline::create(), differ greatly between graphics APIs and
their implementations.
\note Additionally, many drivers will likely employ various caching
strategies for shaders, programs, pipelines. (independently of Qt's own
similar facilities, such as setPipelineCacheData() or the OpenGL-specific
program binary disk cache). Because such internal behavior is transparent
to the API client, Qt and QRhi have no knowledge or control over the exact
caching strategy, persistency, invalidation of the cached data, etc. When
reading timings, such as the time spent on pipeline creation, the potential
presence and unspecified behavior of driver-level caching mechanisms should
be kept in mind.
*/
QRhiMemAllocStats QRhi::graphicsMemoryAllocationStatistics() const
QRhiStats QRhi::statistics() const
{
return d->graphicsMemoryAllocationStatistics();
return d->statistics();
}
/*!

View File

@ -1583,18 +1583,19 @@ Q_DECLARE_TYPEINFO(QRhiDriverInfo, Q_RELOCATABLE_TYPE);
Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiDriverInfo &);
#endif
struct Q_GUI_EXPORT QRhiMemAllocStats
struct Q_GUI_EXPORT QRhiStats
{
qint64 totalPipelineCreationTime = 0;
quint32 blockCount = 0;
quint32 allocCount = 0;
quint64 usedBytes = 0;
quint64 unusedBytes = 0;
};
Q_DECLARE_TYPEINFO(QRhiMemAllocStats, Q_RELOCATABLE_TYPE);
Q_DECLARE_TYPEINFO(QRhiStats, Q_RELOCATABLE_TYPE);
#ifndef QT_NO_DEBUG_STREAM
Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiMemAllocStats &);
Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiStats &);
#endif
struct Q_GUI_EXPORT QRhiInitParams
@ -1795,7 +1796,7 @@ public:
QByteArray pipelineCacheData();
void setPipelineCacheData(const QByteArray &data);
QRhiMemAllocStats graphicsMemoryAllocationStatistics() const;
QRhiStats statistics() const;
protected:
QRhi();

View File

@ -18,6 +18,7 @@
#include "qrhi_p.h"
#include <QBitArray>
#include <QAtomicInt>
#include <QElapsedTimer>
#include <QLoggingCategory>
#include <QtCore/qset.h>
#include <QtCore/qvarlengtharray.h>
@ -131,7 +132,7 @@ public:
virtual int resourceLimit(QRhi::ResourceLimit limit) const = 0;
virtual const QRhiNativeHandles *nativeHandles() = 0;
virtual QRhiDriverInfo driverInfo() const = 0;
virtual QRhiMemAllocStats graphicsMemoryAllocationStatistics() = 0;
virtual QRhiStats statistics() = 0;
virtual bool makeThreadLocalNativeContextCurrent() = 0;
virtual void releaseCachedResources() = 0;
virtual bool isDeviceLost() const = 0;
@ -201,6 +202,21 @@ public:
return (quint32(implType) << 24) | ver;
}
void pipelineCreationStart()
{
pipelineCreationTimer.start();
}
void pipelineCreationEnd()
{
accumulatedPipelineCreationTime += pipelineCreationTimer.elapsed();
}
qint64 totalPipelineCreationTime() const
{
return accumulatedPipelineCreationTime;
}
QRhi *q;
static const int MAX_SHADER_CACHE_ENTRIES = 128;
@ -219,6 +235,8 @@ private:
QSet<QRhiResource *> pendingDeleteResources;
QVarLengthArray<QRhi::CleanupCallback, 4> cleanupCallbacks;
QVarLengthArray<QRhi::GpuFrameTimeCallback, 4> gpuFrameTimeCallbacks;
QElapsedTimer pipelineCreationTimer;
qint64 accumulatedPipelineCreationTime = 0;
friend class QRhi;
friend class QRhiResourceUpdateBatchPrivate;

View File

@ -615,9 +615,11 @@ QRhiDriverInfo QRhiD3D11::driverInfo() const
return driverInfoStruct;
}
QRhiMemAllocStats QRhiD3D11::graphicsMemoryAllocationStatistics()
QRhiStats QRhiD3D11::statistics()
{
return {};
QRhiStats result;
result.totalPipelineCreationTime = totalPipelineCreationTime();
return result;
}
bool QRhiD3D11::makeThreadLocalNativeContextCurrent()
@ -4252,6 +4254,7 @@ bool QD3D11GraphicsPipeline::create()
destroy();
QRHI_RES_RHI(QRhiD3D11);
rhiD->pipelineCreationStart();
if (!rhiD->sanityCheckGraphicsPipeline(this))
return false;
@ -4438,6 +4441,7 @@ bool QD3D11GraphicsPipeline::create()
} // else leave inputLayout set to nullptr; that's valid and it avoids a debug layer warning about an input layout with 0 elements
}
rhiD->pipelineCreationEnd();
generation += 1;
rhiD->registerResource(this);
return true;
@ -4473,6 +4477,7 @@ bool QD3D11ComputePipeline::create()
destroy();
QRHI_RES_RHI(QRhiD3D11);
rhiD->pipelineCreationStart();
auto cacheIt = rhiD->m_shaderCache.constFind(m_shaderStage);
if (cacheIt != rhiD->m_shaderCache.constEnd()) {
@ -4508,6 +4513,7 @@ bool QD3D11ComputePipeline::create()
cs.shader->AddRef();
rhiD->pipelineCreationEnd();
generation += 1;
rhiD->registerResource(this);
return true;

View File

@ -656,7 +656,7 @@ public:
int resourceLimit(QRhi::ResourceLimit limit) const override;
const QRhiNativeHandles *nativeHandles() override;
QRhiDriverInfo driverInfo() const override;
QRhiMemAllocStats graphicsMemoryAllocationStatistics() override;
QRhiStats statistics() override;
bool makeThreadLocalNativeContextCurrent() override;
void releaseCachedResources() override;
bool isDeviceLost() const override;

View File

@ -1302,9 +1302,11 @@ QRhiDriverInfo QRhiGles2::driverInfo() const
return driverInfoStruct;
}
QRhiMemAllocStats QRhiGles2::graphicsMemoryAllocationStatistics()
QRhiStats QRhiGles2::statistics()
{
return {};
QRhiStats result;
result.totalPipelineCreationTime = totalPipelineCreationTime();
return result;
}
bool QRhiGles2::makeThreadLocalNativeContextCurrent()
@ -5454,6 +5456,7 @@ bool QGles2GraphicsPipeline::create()
if (!rhiD->ensureContext())
return false;
rhiD->pipelineCreationStart();
if (!rhiD->sanityCheckGraphicsPipeline(this))
return false;
@ -5574,6 +5577,7 @@ bool QGles2GraphicsPipeline::create()
currentSrb = nullptr;
currentSrbGeneration = 0;
rhiD->pipelineCreationEnd();
generation += 1;
rhiD->registerResource(this);
return true;
@ -5620,6 +5624,8 @@ bool QGles2ComputePipeline::create()
if (!rhiD->ensureContext())
return false;
rhiD->pipelineCreationStart();
const QShaderDescription csDesc = m_shaderStage.shader().description();
QShader::SeparateToCombinedImageSamplerMappingList csSamplerMappingList;
QShaderVersion shaderVersion;
@ -5675,6 +5681,7 @@ bool QGles2ComputePipeline::create()
currentSrb = nullptr;
currentSrbGeneration = 0;
rhiD->pipelineCreationEnd();
generation += 1;
rhiD->registerResource(this);
return true;

View File

@ -807,7 +807,7 @@ public:
int resourceLimit(QRhi::ResourceLimit limit) const override;
const QRhiNativeHandles *nativeHandles() override;
QRhiDriverInfo driverInfo() const override;
QRhiMemAllocStats graphicsMemoryAllocationStatistics() override;
QRhiStats statistics() override;
bool makeThreadLocalNativeContextCurrent() override;
void releaseCachedResources() override;
bool isDeviceLost() const override;

View File

@ -658,9 +658,11 @@ QRhiDriverInfo QRhiMetal::driverInfo() const
return driverInfoStruct;
}
QRhiMemAllocStats QRhiMetal::graphicsMemoryAllocationStatistics()
QRhiStats QRhiMetal::statistics()
{
return {};
QRhiStats result;
result.totalPipelineCreationTime = totalPipelineCreationTime();
return result;
}
bool QRhiMetal::makeThreadLocalNativeContextCurrent()
@ -3586,6 +3588,7 @@ bool QMetalGraphicsPipeline::create()
destroy();
QRHI_RES_RHI(QRhiMetal);
rhiD->pipelineCreationStart();
if (!rhiD->sanityCheckGraphicsPipeline(this))
return false;
@ -3772,6 +3775,7 @@ bool QMetalGraphicsPipeline::create()
d->depthBias = float(m_depthBias);
d->slopeScaledDepthBias = m_slopeScaledDepthBias;
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
generation += 1;
rhiD->registerResource(this);
@ -3811,6 +3815,7 @@ bool QMetalComputePipeline::create()
destroy();
QRHI_RES_RHI(QRhiMetal);
rhiD->pipelineCreationStart();
auto cacheIt = rhiD->d->shaderCache.constFind(m_shaderStage);
if (cacheIt != rhiD->d->shaderCache.constEnd()) {
@ -3858,6 +3863,7 @@ bool QMetalComputePipeline::create()
return false;
}
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
generation += 1;
rhiD->registerResource(this);

View File

@ -417,7 +417,7 @@ public:
int resourceLimit(QRhi::ResourceLimit limit) const override;
const QRhiNativeHandles *nativeHandles() override;
QRhiDriverInfo driverInfo() const override;
QRhiMemAllocStats graphicsMemoryAllocationStatistics() override;
QRhiStats statistics() override;
bool makeThreadLocalNativeContextCurrent() override;
void releaseCachedResources() override;
bool isDeviceLost() const override;

View File

@ -151,7 +151,7 @@ QRhiDriverInfo QRhiNull::driverInfo() const
return info;
}
QRhiMemAllocStats QRhiNull::graphicsMemoryAllocationStatistics()
QRhiStats QRhiNull::statistics()
{
return {};
}

View File

@ -273,7 +273,7 @@ public:
int resourceLimit(QRhi::ResourceLimit limit) const override;
const QRhiNativeHandles *nativeHandles() override;
QRhiDriverInfo driverInfo() const override;
QRhiMemAllocStats graphicsMemoryAllocationStatistics() override;
QRhiStats statistics() override;
bool makeThreadLocalNativeContextCurrent() override;
void releaseCachedResources() override;
bool isDeviceLost() const override;

View File

@ -4353,16 +4353,19 @@ QRhiDriverInfo QRhiVulkan::driverInfo() const
return driverInfoStruct;
}
QRhiMemAllocStats QRhiVulkan::graphicsMemoryAllocationStatistics()
QRhiStats QRhiVulkan::statistics()
{
VmaStats stats;
vmaCalculateStats(toVmaAllocator(allocator), &stats);
return {
stats.total.blockCount,
stats.total.allocationCount,
stats.total.usedBytes,
stats.total.unusedBytes
};
QRhiStats result;
result.totalPipelineCreationTime = totalPipelineCreationTime();
result.blockCount = stats.total.blockCount;
result.allocCount = stats.total.allocationCount;
result.usedBytes = stats.total.usedBytes;
result.unusedBytes = stats.total.unusedBytes;
return result;
}
bool QRhiVulkan::makeThreadLocalNativeContextCurrent()
@ -6844,6 +6847,7 @@ bool QVkGraphicsPipeline::create()
destroy();
QRHI_RES_RHI(QRhiVulkan);
rhiD->pipelineCreationStart();
if (!rhiD->sanityCheckGraphicsPipeline(this))
return false;
@ -7081,6 +7085,7 @@ bool QVkGraphicsPipeline::create()
return false;
}
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
generation += 1;
rhiD->registerResource(this);
@ -7125,6 +7130,7 @@ bool QVkComputePipeline::create()
destroy();
QRHI_RES_RHI(QRhiVulkan);
rhiD->pipelineCreationStart();
if (!rhiD->ensurePipelineCache())
return false;
@ -7176,6 +7182,7 @@ bool QVkComputePipeline::create()
return false;
}
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
generation += 1;
rhiD->registerResource(this);

View File

@ -730,7 +730,7 @@ public:
int resourceLimit(QRhi::ResourceLimit limit) const override;
const QRhiNativeHandles *nativeHandles() override;
QRhiDriverInfo driverInfo() const override;
QRhiMemAllocStats graphicsMemoryAllocationStatistics() override;
QRhiStats statistics() override;
bool makeThreadLocalNativeContextCurrent() override;
void releaseCachedResources() override;
bool isDeviceLost() const override;

View File

@ -52,6 +52,8 @@ private slots:
void rhiTestDataOpenGL();
void create_data();
void create();
void stats_data();
void stats();
void nativeHandles_data();
void nativeHandles();
void nativeHandlesImportVulkan();
@ -400,6 +402,38 @@ void tst_QRhi::create()
}
}
void tst_QRhi::stats_data()
{
rhiTestData();
}
void tst_QRhi::stats()
{
QFETCH(QRhi::Implementation, impl);
QFETCH(QRhiInitParams *, initParams);
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
if (!rhi)
QSKIP("QRhi could not be created, skipping testing statistics getter");
QRhiStats stats = rhi->statistics();
qDebug() << stats;
QCOMPARE(stats.totalPipelineCreationTime, 0);
if (impl == QRhi::Vulkan) {
QScopedPointer<QRhiBuffer> buf(rhi->newBuffer(QRhiBuffer::Immutable, QRhiBuffer::VertexBuffer, 32768));
QVERIFY(buf->create());
QScopedPointer<QRhiTexture> tex(rhi->newTexture(QRhiTexture::RGBA8, QSize(1024, 1024)));
QVERIFY(tex->create());
stats = rhi->statistics();
qDebug() << stats;
QVERIFY(stats.allocCount > 0);
QVERIFY(stats.blockCount > 0);
QVERIFY(stats.usedBytes > 0);
}
}
void tst_QRhi::nativeHandles_data()
{
rhiTestData();