RHI: Metal SPIRV-Cross buffer size buffers

When SPIRV-Cross encounters a GLSL storage buffer runtime sized array,
it generates MSL code which expects a "buffer size buffer" containing a
list of storage buffer sizes to be bound.  This patch adds RHI backend
support for Metal "buffer size buffers" on compute and graphics
(including tessellation) pipelines.  Includes unit tests.

An accompanying patch to qtshadertools is required.

Change-Id: I9392bfb21803e1a868d7de420fedc097a8452429
Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
This commit is contained in:
Ben Fletcher 2022-12-13 15:47:19 -08:00 committed by Laszlo Agocs
parent d21e9a4326
commit dc0b2466f8
14 changed files with 691 additions and 3 deletions

View File

@ -393,6 +393,9 @@ struct QMetalGraphicsPipelineData
} tess; } tess;
void setupVertexInputDescriptor(MTLVertexDescriptor *desc); void setupVertexInputDescriptor(MTLVertexDescriptor *desc);
void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc); void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc);
// SPIRV-Cross buffer size buffers
QMetalBuffer *bufferSizeBuffer = nullptr;
}; };
struct QMetalComputePipelineData struct QMetalComputePipelineData
@ -400,6 +403,9 @@ struct QMetalComputePipelineData
id<MTLComputePipelineState> ps = nil; id<MTLComputePipelineState> ps = nil;
QMetalShader cs; QMetalShader cs;
MTLSize localSize; MTLSize localSize;
// SPIRV-Cross buffer size buffers
QMetalBuffer *bufferSizeBuffer = nullptr;
}; };
struct QMetalSwapChainData struct QMetalSwapChainData
@ -1457,6 +1463,12 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
bool hasDynamicOffsetInSrb = false; bool hasDynamicOffsetInSrb = false;
bool resNeedsRebind = false; bool resNeedsRebind = false;
// SPIRV-Cross buffer size buffers
// Need to determine storage buffer sizes here as this is the last opportunity for storage
// buffer bindings (offset, size) to be specified before draw / dispatch call
const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer);
QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes;
// do buffer writes, figure out if we need to rebind, and mark as in-use // do buffer writes, figure out if we need to rebind, and mark as in-use
for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) { for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) {
const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data(); const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data();
@ -1533,6 +1545,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
{ {
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
if (needsBufferSizeBuffer) {
for (int i = 0; i < 6; ++i) {
const QRhiShaderResourceBinding::StageFlag stage =
QRhiShaderResourceBinding::StageFlag(1 << i);
if (b->stage.testFlag(stage)) {
storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size();
}
}
}
executeBufferHostWritesForCurrentFrame(bufD); executeBufferHostWritesForCurrentFrame(bufD);
if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) {
resNeedsRebind = true; resNeedsRebind = true;
@ -1548,6 +1571,111 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
} }
} }
if (needsBufferSizeBuffer) {
QMetalBuffer *bufD = nullptr;
QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders;
if (compPsD) {
bufD = compPsD->d->bufferSizeBuffer;
Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage));
} else {
bufD = gfxPsD->d->bufferSizeBuffer;
if (gfxPsD->d->tess.enabled) {
// Assumptions
// * We only use one of the compute vertex shader variants in a pipeline at any one time
// * The vertex shader variants all have the same storage block bindings
// * The vertex shader variants all have the same native resource binding map
// * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement
// * The vertex shader variants all have the same MslBufferSizeBufferBinding binding
// => We only need to use one vertex shader variant to generate the identical shader
// resource bindings
Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks());
Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks());
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
== gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
== gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
== gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
== gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage));
if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage));
if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage));
} else {
if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage));
}
if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage));
}
quint32 offset = 0;
for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) {
const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
// if we don't have a srb entry for the buffer size buffer
if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) {
int maxNativeBinding = 0;
for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks())
maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first);
const int size = (maxNativeBinding + 1) * sizeof(int);
Q_ASSERT(offset + size <= bufD->size());
srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size));
QMetalShaderResourceBindings::BoundResourceData bd;
bd.sbuf.id = bufD->m_id;
bd.sbuf.generation = bufD->generation;
srbD->boundResourceData.append(bd);
}
// create the buffer size buffer data
QVarLengthArray<int, 8> bufferSizeBufferData;
Q_ASSERT(storageBufferSizes.contains(shader.second));
const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]);
for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) {
const int index = shader.first->nativeResourceBindingMap[block.binding].first;
// if the native binding is -1, the buffer is present but not accessed in the shader
if (index < 0)
continue;
if (bufferSizeBufferData.size() <= index)
bufferSizeBufferData.resize(index + 1);
Q_ASSERT(sizes.contains(block.binding));
bufferSizeBufferData[index] = sizes[block.binding];
}
QRhiBufferData data;
const quint32 size = bufferSizeBufferData.size() * sizeof(int);
data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size);
Q_ASSERT(offset + size <= bufD->size());
bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data });
// buffer offsets must be 32byte aligned
offset += ((size + 31) / 32) * 32;
}
executeBufferHostWritesForCurrentFrame(bufD);
bufD->lastActiveFrameSlot = currentFrameSlot;
}
// make sure the resources for the correct slot get bound // make sure the resources for the correct slot get bound
const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0; const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0;
if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot) if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot)
@ -4117,6 +4245,9 @@ void QMetalGraphicsPipeline::destroy()
qDeleteAll(d->tess.hostVisibleWorkBuffers); qDeleteAll(d->tess.hostVisibleWorkBuffers);
d->tess.hostVisibleWorkBuffers.clear(); d->tess.hostVisibleWorkBuffers.clear();
delete d->bufferSizeBuffer;
d->bufferSizeBuffer = nullptr;
if (!d->ps && !d->ds if (!d->ps && !d->ds
&& !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2] && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2]
&& !d->tess.tessControlComputeState) && !d->tess.tessControlComputeState)
@ -4726,6 +4857,8 @@ bool QMetalGraphicsPipeline::createVertexFragmentPipeline()
d->vs.lib = lib; d->vs.lib = lib;
d->vs.func = func; d->vs.func = func;
d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
d->vs.desc = shader.description();
d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
rhiD->d->shaderCache.insert(shaderStage, d->vs); rhiD->d->shaderCache.insert(shaderStage, d->vs);
[d->vs.lib retain]; [d->vs.lib retain];
[d->vs.func retain]; [d->vs.func retain];
@ -4735,6 +4868,8 @@ bool QMetalGraphicsPipeline::createVertexFragmentPipeline()
d->fs.lib = lib; d->fs.lib = lib;
d->fs.func = func; d->fs.func = func;
d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
d->fs.desc = shader.description();
d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
rhiD->d->shaderCache.insert(shaderStage, d->fs); rhiD->d->shaderCache.insert(shaderStage, d->fs);
[d->fs.lib retain]; [d->fs.lib retain];
[d->fs.func retain]; [d->fs.func retain];
@ -5283,7 +5418,9 @@ bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert
} }
d->fs.lib = fragLib; d->fs.lib = fragLib;
d->fs.func = fragFunc; d->fs.func = fragFunc;
d->fs.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey); d->fs.desc = tessFrag.description();
d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey);
d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey);
if (!d->tess.teseFragRenderPipeline(rhiD, this)) { if (!d->tess.teseFragRenderPipeline(rhiD, this)) {
qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader"); qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader");
@ -5342,6 +5479,42 @@ bool QMetalGraphicsPipeline::create()
if (!ok) if (!ok)
return false; return false;
// SPIRV-Cross buffer size buffers
int buffers = 0;
QVarLengthArray<QMetalShader *, 6> shaders;
if (d->tess.enabled) {
shaders.append(&d->tess.compVs[0]);
shaders.append(&d->tess.compVs[1]);
shaders.append(&d->tess.compVs[2]);
shaders.append(&d->tess.compTesc);
shaders.append(&d->tess.vertTese);
} else {
shaders.append(&d->vs);
}
shaders.append(&d->fs);
for (QMetalShader *shader : shaders) {
if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1);
int maxNativeBinding = 0;
for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks())
maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first);
// we use one buffer to hold data for all graphics shader stages, each with a different offset.
// buffer offsets must be 32byte aligned - adjust buffer count accordingly
buffers += ((maxNativeBinding + 1 + 7) / 8) * 8;
}
}
if (buffers) {
if (!d->bufferSizeBuffer)
d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
d->bufferSizeBuffer->setSize(buffers * sizeof(int));
d->bufferSizeBuffer->create();
}
rhiD->pipelineCreationEnd(); rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1; lastActiveFrameSlot = -1;
generation += 1; generation += 1;
@ -5368,6 +5541,9 @@ void QMetalComputePipeline::destroy()
if (!d->ps) if (!d->ps)
return; return;
delete d->bufferSizeBuffer;
d->bufferSizeBuffer = nullptr;
QRhiMetalData::DeferredReleaseEntry e; QRhiMetalData::DeferredReleaseEntry e;
e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline; e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline;
e.lastActiveFrameSlot = lastActiveFrameSlot; e.lastActiveFrameSlot = lastActiveFrameSlot;
@ -5436,6 +5612,14 @@ bool QMetalComputePipeline::create()
d->cs.func = func; d->cs.func = func;
d->cs.localSize = shader.description().computeShaderLocalSize(); d->cs.localSize = shader.description().computeShaderLocalSize();
d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
d->cs.desc = shader.description();
d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
// SPIRV-Cross buffer size buffers
if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1);
}
if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) { if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) {
for (QMetalShader &s : rhiD->d->shaderCache) for (QMetalShader &s : rhiD->d->shaderCache)
@ -5470,6 +5654,21 @@ bool QMetalComputePipeline::create()
return false; return false;
} }
// SPIRV-Cross buffer size buffers
if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
int buffers = 0;
for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks())
buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first);
buffers += 1;
if (!d->bufferSizeBuffer)
d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
d->bufferSizeBuffer->setSize(buffers * sizeof(int));
d->bufferSizeBuffer->create();
}
rhiD->pipelineCreationEnd(); rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1; lastActiveFrameSlot = -1;
generation += 1; generation += 1;

View File

@ -39,7 +39,8 @@ struct Q_GUI_EXPORT QShaderPrivate
MslTessTescTessLevelBufferBinding, MslTessTescTessLevelBufferBinding,
MslTessTescPatchOutputBufferBinding, MslTessTescPatchOutputBufferBinding,
MslTessTescParamsBufferBinding, MslTessTescParamsBufferBinding,
MslTessTescInputBufferBinding MslTessTescInputBufferBinding,
MslBufferSizeBufferBinding
}; };
QShaderPrivate() QShaderPrivate()

View File

@ -16,4 +16,8 @@ qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletes
qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb
qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb
qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb
qsb --glsl 320es,430 --msl 12 --msltess storagebuffer_runtime.vert -o storagebuffer_runtime.vert.qsb
qsb --glsl 320es,430 --msl 12 --tess-mode triangles storagebuffer_runtime.tesc -o storagebuffer_runtime.tesc.qsb
qsb --glsl 320es,430 --msl 12 --tess-vertex-count 3 storagebuffer_runtime.tese -o storagebuffer_runtime.tese.qsb
qsb --glsl 320es,430 --msl 12 storagebuffer_runtime.frag -o storagebuffer_runtime.frag.qsb
qsb --glsl 320es,430 --hlsl 50 -c --msl 12 storagebuffer_runtime.comp -o storagebuffer_runtime.comp.qsb

View File

@ -0,0 +1,25 @@
#version 430
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0, std430) buffer toGpu
{
float _float[];
};
layout (binding = 1, std140) buffer fromGpu
{
int _int[];
};
void main()
{
int length = min(_float.length(), _int.length());
for (int i = 0; i < length; ++i)
_int[i] = int(_float[i]);
}

View File

@ -0,0 +1,33 @@
#version 450
layout (location = 0) out vec4 fragColor;
layout (std430, binding = 1) readonly buffer ssboG
{
float g[];
};
layout (std430, binding = 2) readonly buffer ssboB
{
float b[];
};
layout (std430, binding = 6) readonly buffer ssboR
{
float r[];
};
layout (std430, binding = 3) readonly buffer ssbo3
{
vec4 _vec4;
};
void main()
{
// some OpenGL implementations will optimize out the buffer variables if we don't use them
// resulting in a .length() of 0.
float a = (r[0]+g[0]+b[0])>0?1:1;
fragColor = a * vec4(r.length(), g.length(), b.length(), 255)/vec4(255);
}

View File

@ -0,0 +1,42 @@
#version 450
layout(vertices = 3) out;
layout (std430, binding = 7) readonly buffer ssbo7
{
float float7[];
};
layout (std430, binding = 8) readonly buffer ssbo8
{
float float8[];
};
layout (std430, binding = 9) readonly buffer ssbo9
{
float float9[];
};
layout (std430, binding = 10) readonly buffer ssbo10
{
float float10[];
};
void main()
{
// some OpenGL implementations will optimize out the buffer variables if we don't use them
// resulting in a .length() of 0
float a = float7[0] == 0 && float8[0] == 0 && float9[0] == 0 && float10[0] == 0 ? 1 : 1;
if (gl_InvocationID == 0) {
gl_TessLevelOuter[0] = float7.length() * a;
gl_TessLevelOuter[1] = float8.length() * a;
gl_TessLevelOuter[2] = float9.length() * a;
gl_TessLevelInner[0] = float10.length() * a;
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,39 @@
#version 450
layout(triangles, fractional_odd_spacing, ccw) in;
layout (std140, binding = 6) uniform unused0
{
int unused;
}u0;
layout (binding = 0) uniform u
{
mat4 matrix;
};
layout (std430, binding = 5) readonly buffer ssbo5
{
float _float[];
};
layout (std430, binding = 8) readonly buffer ssbo8
{
float float8[];
};
layout (std430, binding = 1) readonly buffer unused1
{
int unused[];
}u1;
void main()
{
// some OpenGL implementations will optimize out the buffer variables if we don't use them
// resulting in a .length() of 0
float a = _float[0] == 0 && float8[0] == 1 ? 1 : 1;
if(_float.length() == 64)
gl_Position = a * matrix * ((gl_TessCoord.x * gl_in[0].gl_Position) + (gl_TessCoord.y * gl_in[1].gl_Position) + (gl_TessCoord.z * gl_in[2].gl_Position)) * (float8.length()==2?1:0);
}

View File

@ -0,0 +1,48 @@
#version 450
layout (location = 0) in vec3 position;
layout (std140, binding = 6) uniform unused0
{
int unused;
}u0;
layout (binding = 0) uniform u
{
mat4 matrix;
};
layout (std430, binding = 5) readonly buffer ssbo5
{
float _float[];
};
layout (std140, binding = 3) readonly buffer ssbo3
{
vec4 _vec4;
};
layout (std430, binding = 4) readonly buffer ssbo1
{
bool _bool[];
};
layout (std430, binding = 1) readonly buffer unused1
{
int unused[];
}u1;
void main()
{
// some OpenGL implementations will optimize out the buffer variables if we don't use them
// resulting in a .length() of 0
float a = _float[0] == 0 && _bool[0] ? 1 : 1;
gl_Position = vec4(0);
if(_bool.length() == 32)
gl_Position = a * matrix * vec4(position*_vec4.xyz, _float.length() == 64 ? 1.0 : 0.0);
}

View File

@ -147,6 +147,10 @@ private slots:
void storageBuffer_data(); void storageBuffer_data();
void storageBuffer(); void storageBuffer();
void storageBufferRuntimeSizeCompute_data();
void storageBufferRuntimeSizeCompute();
void storageBufferRuntimeSizeGraphics_data();
void storageBufferRuntimeSizeGraphics();
private: private:
void setWindowType(QWindow *window, QRhi::Implementation impl); void setWindowType(QWindow *window, QRhi::Implementation impl);
@ -5889,5 +5893,298 @@ void tst_QRhi::storageBuffer()
} }
} }
void tst_QRhi::storageBufferRuntimeSizeCompute_data()
{
rhiTestData();
}
void tst_QRhi::storageBufferRuntimeSizeCompute()
{
// Use a compute shader to copy from one storage buffer with std430 runtime
// float array to another with std140 runtime int array. We fill the
// "toGpu" buffer with known float data generated and uploaded from the
// CPU, then dispatch a compute shader to copy from the "toGpu" buffer to
// the "fromGpu" buffer. We then readback the "fromGpu" buffer and verify
// that the results are as expected. This is primarily to test Metal
// SPIRV-Cross buffer size buffers.
QFETCH(QRhi::Implementation, impl);
QFETCH(QRhiInitParams *, initParams);
// we can't test with Null as there is no compute
if (impl == QRhi::Null)
return;
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
if (!rhi)
QSKIP("QRhi could not be created, skipping testing");
if (!rhi->isFeatureSupported(QRhi::Feature::Compute))
QSKIP("Compute is not supported with this graphics API, skipping test");
QShader s = loadShader(":/data/storagebuffer_runtime.comp.qsb");
QVERIFY(s.isValid());
QCOMPARE(s.description().storageBlocks().size(), 2);
QMap<QByteArray, QShaderDescription::StorageBlock> blocks;
for (const QShaderDescription::StorageBlock &block : s.description().storageBlocks())
blocks[block.blockName] = block;
QMap<QByteArray, QShaderDescription::BlockVariable> toGpuMembers;
for (const QShaderDescription::BlockVariable &member : blocks["toGpu"].members)
toGpuMembers[member.name] = member;
QMap<QByteArray, QShaderDescription::BlockVariable> fromGpuMembers;
for (const QShaderDescription::BlockVariable &member : blocks["fromGpu"].members)
fromGpuMembers[member.name] = member;
for (QRhiBuffer::Type type : { QRhiBuffer::Type::Immutable, QRhiBuffer::Type::Static }) {
QRhiCommandBuffer *cb = nullptr;
rhi->beginOffscreenFrame(&cb);
QVERIFY(cb);
QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch();
QVERIFY(u);
const int stride430 = sizeof(float);
const int stride140 = 4 * sizeof(float);
const int length = 32;
QScopedPointer<QRhiBuffer> toGpuBuffer(
rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer,
blocks["toGpu"].knownSize + length * stride430));
QVERIFY(toGpuBuffer->create());
QScopedPointer<QRhiBuffer> fromGpuBuffer(
rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer,
blocks["fromGpu"].knownSize + length * stride140));
QVERIFY(fromGpuBuffer->create());
QByteArray toGpuData(toGpuBuffer->size(), 0);
for (int i = 0; i < length; ++i)
reinterpret_cast<float &>(toGpuData.data()[toGpuMembers["_float"].offset + i * stride430]) = float(i);
u->uploadStaticBuffer(toGpuBuffer.data(), 0, toGpuData.size(), toGpuData.constData());
u->uploadStaticBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize,
QByteArray(fromGpuBuffer->size(), 0).constData());
QScopedPointer<QRhiShaderResourceBindings> srb(rhi->newShaderResourceBindings());
srb->setBindings(
{ QRhiShaderResourceBinding::bufferLoadStore(
blocks["toGpu"].binding, QRhiShaderResourceBinding::ComputeStage,
toGpuBuffer.data()),
QRhiShaderResourceBinding::bufferLoadStore(
blocks["fromGpu"].binding, QRhiShaderResourceBinding::ComputeStage,
fromGpuBuffer.data()) });
QVERIFY(srb->create());
QScopedPointer<QRhiComputePipeline> pipeline(rhi->newComputePipeline());
pipeline->setShaderStage({ QRhiShaderStage::Compute, s });
pipeline->setShaderResourceBindings(srb.data());
QVERIFY(pipeline->create());
cb->beginComputePass(u);
cb->setComputePipeline(pipeline.data());
cb->setShaderResources();
cb->dispatch(1, 1, 1);
u = rhi->nextResourceUpdateBatch();
QVERIFY(u);
int readbackCompleted = 0;
QRhiBufferReadbackResult result;
result.completed = [&readbackCompleted]() { readbackCompleted++; };
u->readBackBuffer(fromGpuBuffer.data(), 0, fromGpuBuffer->size(), &result);
cb->endComputePass(u);
rhi->endOffscreenFrame();
QVERIFY(readbackCompleted > 0);
QCOMPARE(result.data.size(), fromGpuBuffer->size());
for (int i = 0; i < length; ++i)
QCOMPARE(reinterpret_cast<const int &>(result.data.constData()[fromGpuMembers["_int"].offset + i * stride140]), i);
QCOMPARE(readbackCompleted, 1);
}
}
void tst_QRhi::storageBufferRuntimeSizeGraphics_data()
{
rhiTestData();
}
void tst_QRhi::storageBufferRuntimeSizeGraphics()
{
// Draws a tessellated triangle with color determined by the length of
// buffers bound to shader stages. This is primarily to test Metal
// SPIRV-Cross buffer size buffers.
QFETCH(QRhi::Implementation, impl);
QFETCH(QRhiInitParams *, initParams);
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
if (!rhi)
QSKIP("QRhi could not be created, skipping testing rendering");
if (!rhi->isFeatureSupported(QRhi::Tessellation)) {
// From a Vulkan or Metal implementation we expect tessellation to work,
// even though it is optional (as per spec) for Vulkan.
QVERIFY(rhi->backend() != QRhi::Vulkan);
QVERIFY(rhi->backend() != QRhi::Metal);
QSKIP("Tessellation is not supported with this graphics API, skipping test");
}
if (rhi->backend() == QRhi::D3D11)
QSKIP("Skipping tessellation test on D3D for now, test assets not prepared for HLSL yet");
QScopedPointer<QRhiTexture> texture(rhi->newTexture(QRhiTexture::RGBA8, QSize(64, 64), 1,
QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource));
QVERIFY(texture->create());
QScopedPointer<QRhiTextureRenderTarget> rt(rhi->newTextureRenderTarget({ texture.data() }));
QScopedPointer<QRhiRenderPassDescriptor> rpDesc(rt->newCompatibleRenderPassDescriptor());
rt->setRenderPassDescriptor(rpDesc.data());
QVERIFY(rt->create());
static const float triangleVertices[] = {
0.0f, 0.5f, 0.0f,
-0.5f, -0.5f, 0.0f,
0.5f, -0.5f, 0.0f,
};
QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch();
QScopedPointer<QRhiBuffer> vbuf(rhi->newBuffer(QRhiBuffer::Immutable, QRhiBuffer::VertexBuffer, sizeof(triangleVertices)));
QVERIFY(vbuf->create());
u->uploadStaticBuffer(vbuf.data(), triangleVertices);
QScopedPointer<QRhiBuffer> ubuf(rhi->newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 64));
QVERIFY(ubuf->create());
QMatrix4x4 mvp = rhi->clipSpaceCorrMatrix();
u->updateDynamicBuffer(ubuf.data(), 0, 64, mvp.constData());
QScopedPointer<QRhiBuffer> ssbo5(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 256));
QVERIFY(ssbo5->create());
QScopedPointer<QRhiBuffer> ssbo3(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16));
QVERIFY(ssbo3->create());
u->uploadStaticBuffer(ssbo3.data(), QVector<float>({ 1.0f, 1.0f, 1.0f, 1.0f }).constData());
QScopedPointer<QRhiBuffer> ssbo4(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 128));
QVERIFY(ssbo4->create());
const int red = 79;
const int green = 43;
const int blue = 251;
QScopedPointer<QRhiBuffer> ssboR(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, red * sizeof(float)));
QVERIFY(ssboR->create());
QScopedPointer<QRhiBuffer> ssboG(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, green * sizeof(float)));
QVERIFY(ssboG->create());
QScopedPointer<QRhiBuffer> ssboB(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, blue * sizeof(float)));
QVERIFY(ssboB->create());
const int tessOuter0 = 1;
const int tessOuter1 = 2;
const int tessOuter2 = 3;
const int tessInner0 = 4;
QScopedPointer<QRhiBuffer> ssboTessOuter0(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter0 * sizeof(float)));
QVERIFY(ssboTessOuter0->create());
QScopedPointer<QRhiBuffer> ssboTessOuter1(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter1 * sizeof(float)));
QVERIFY(ssboTessOuter1->create());
QScopedPointer<QRhiBuffer> ssboTessOuter2(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter2 * sizeof(float)));
QVERIFY(ssboTessOuter2->create());
QScopedPointer<QRhiBuffer> ssboTessInner0(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessInner0 * sizeof(float)));
QVERIFY(ssboTessInner0->create());
QScopedPointer<QRhiShaderResourceBindings> srb(rhi->newShaderResourceBindings());
srb->setBindings({ QRhiShaderResourceBinding::uniformBuffer(0, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ubuf.data()),
QRhiShaderResourceBinding::bufferLoad(5, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssbo5.data()),
QRhiShaderResourceBinding::bufferLoad(3, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage | QRhiShaderResourceBinding::FragmentStage, ssbo3.data()),
QRhiShaderResourceBinding::bufferLoad(4, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssbo4.data()),
QRhiShaderResourceBinding::bufferLoad(7, QRhiShaderResourceBinding::TessellationControlStage, ssboTessOuter0.data()),
QRhiShaderResourceBinding::bufferLoad(8, QRhiShaderResourceBinding::TessellationControlStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssboTessOuter1.data()),
QRhiShaderResourceBinding::bufferLoad(9, QRhiShaderResourceBinding::TessellationControlStage, ssboTessOuter2.data()),
QRhiShaderResourceBinding::bufferLoad(10, QRhiShaderResourceBinding::TessellationControlStage, ssboTessInner0.data()),
QRhiShaderResourceBinding::bufferLoad(1, QRhiShaderResourceBinding::FragmentStage, ssboG.data()),
QRhiShaderResourceBinding::bufferLoad(2, QRhiShaderResourceBinding::FragmentStage, ssboB.data()),
QRhiShaderResourceBinding::bufferLoad(6, QRhiShaderResourceBinding::FragmentStage, ssboR.data()) });
QVERIFY(srb->create());
QScopedPointer<QRhiGraphicsPipeline> pipeline(rhi->newGraphicsPipeline());
pipeline->setTopology(QRhiGraphicsPipeline::Patches);
pipeline->setPatchControlPointCount(3);
pipeline->setShaderStages({
{ QRhiShaderStage::Vertex, loadShader(":/data/storagebuffer_runtime.vert.qsb") },
{ QRhiShaderStage::TessellationControl, loadShader(":/data/storagebuffer_runtime.tesc.qsb") },
{ QRhiShaderStage::TessellationEvaluation, loadShader(":/data/storagebuffer_runtime.tese.qsb") },
{ QRhiShaderStage::Fragment, loadShader(":/data/storagebuffer_runtime.frag.qsb") }
});
pipeline->setCullMode(QRhiGraphicsPipeline::None);
QRhiVertexInputLayout inputLayout;
inputLayout.setBindings({
{ 3 * sizeof(float) }
});
inputLayout.setAttributes({
{ 0, 0, QRhiVertexInputAttribute::Float3, 0 },
});
pipeline->setVertexInputLayout(inputLayout);
pipeline->setShaderResourceBindings(srb.data());
pipeline->setRenderPassDescriptor(rpDesc.data());
QVERIFY(pipeline->create());
QRhiCommandBuffer *cb = nullptr;
QCOMPARE(rhi->beginOffscreenFrame(&cb), QRhi::FrameOpSuccess);
cb->beginPass(rt.data(), Qt::black, { 1.0f, 0 }, u);
cb->setGraphicsPipeline(pipeline.data());
cb->setViewport({ 0, 0, float(rt->pixelSize().width()), float(rt->pixelSize().height()) });
cb->setShaderResources();
QRhiCommandBuffer::VertexInput vbufBinding(vbuf.data(), 0);
cb->setVertexInput(0, 1, &vbufBinding);
cb->draw(3);
QRhiReadbackResult readResult;
QImage result;
readResult.completed = [&readResult, &result] {
result = QImage(reinterpret_cast<const uchar *>(readResult.data.constData()),
readResult.pixelSize.width(), readResult.pixelSize.height(),
QImage::Format_RGBA8888);
};
QRhiResourceUpdateBatch *readbackBatch = rhi->nextResourceUpdateBatch();
readbackBatch->readBackTexture({ texture.data() }, &readResult);
cb->endPass(readbackBatch);
rhi->endOffscreenFrame();
QCOMPARE(result.size(), rt->pixelSize());
// cannot check rendering results with Null, because there is no rendering there
if (impl == QRhi::Null)
return;
QCOMPARE(result.pixel(32, 32), qRgb(red, green, blue));
}
#include <tst_qrhi.moc> #include <tst_qrhi.moc>
QTEST_MAIN(tst_QRhi) QTEST_MAIN(tst_QRhi)