diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index 07b9a2af09..682aa56a6f 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -393,6 +393,9 @@ struct QMetalGraphicsPipelineData } tess; void setupVertexInputDescriptor(MTLVertexDescriptor *desc); void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc); + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalComputePipelineData @@ -400,6 +403,9 @@ struct QMetalComputePipelineData id ps = nil; QMetalShader cs; MTLSize localSize; + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalSwapChainData @@ -1457,6 +1463,12 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind bool hasDynamicOffsetInSrb = false; bool resNeedsRebind = false; + // SPIRV-Cross buffer size buffers + // Need to determine storage buffer sizes here as this is the last opportunity for storage + // buffer bindings (offset, size) to be specified before draw / dispatch call + const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer); + QMap> storageBufferSizes; + // do buffer writes, figure out if we need to rebind, and mark as in-use for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) { const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data(); @@ -1533,6 +1545,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); + + if (needsBufferSizeBuffer) { + for (int i = 0; i < 6; ++i) { + const QRhiShaderResourceBinding::StageFlag stage = + QRhiShaderResourceBinding::StageFlag(1 << i); + if (b->stage.testFlag(stage)) { + storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size(); + } + } + } + executeBufferHostWritesForCurrentFrame(bufD); if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { resNeedsRebind = true; @@ -1548,6 +1571,111 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } } + if (needsBufferSizeBuffer) { + QMetalBuffer *bufD = nullptr; + QVarLengthArray, 4> shaders; + + if (compPsD) { + bufD = compPsD->d->bufferSizeBuffer; + Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage)); + } else { + bufD = gfxPsD->d->bufferSizeBuffer; + if (gfxPsD->d->tess.enabled) { + + // Assumptions + // * We only use one of the compute vertex shader variants in a pipeline at any one time + // * The vertex shader variants all have the same storage block bindings + // * The vertex shader variants all have the same native resource binding map + // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement + // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding + // => We only need to use one vertex shader variant to generate the identical shader + // resource bindings + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + + if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage)); + + if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage)); + + if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage)); + + } else { + if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage)); + } + if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage)); + } + + quint32 offset = 0; + for (const QPair &shader : shaders) { + + const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + + // if we don't have a srb entry for the buffer size buffer + if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) { + + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first); + + const int size = (maxNativeBinding + 1) * sizeof(int); + + Q_ASSERT(offset + size <= bufD->size()); + srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size)); + + QMetalShaderResourceBindings::BoundResourceData bd; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + srbD->boundResourceData.append(bd); + } + + // create the buffer size buffer data + QVarLengthArray bufferSizeBufferData; + Q_ASSERT(storageBufferSizes.contains(shader.second)); + const QMap &sizes(storageBufferSizes[shader.second]); + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) { + const int index = shader.first->nativeResourceBindingMap[block.binding].first; + + // if the native binding is -1, the buffer is present but not accessed in the shader + if (index < 0) + continue; + + if (bufferSizeBufferData.size() <= index) + bufferSizeBufferData.resize(index + 1); + + Q_ASSERT(sizes.contains(block.binding)); + bufferSizeBufferData[index] = sizes[block.binding]; + } + + QRhiBufferData data; + const quint32 size = bufferSizeBufferData.size() * sizeof(int); + data.assign(reinterpret_cast(bufferSizeBufferData.constData()), size); + Q_ASSERT(offset + size <= bufD->size()); + bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data }); + + // buffer offsets must be 32byte aligned + offset += ((size + 31) / 32) * 32; + } + + executeBufferHostWritesForCurrentFrame(bufD); + bufD->lastActiveFrameSlot = currentFrameSlot; + } + // make sure the resources for the correct slot get bound const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0; if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot) @@ -4117,6 +4245,9 @@ void QMetalGraphicsPipeline::destroy() qDeleteAll(d->tess.hostVisibleWorkBuffers); d->tess.hostVisibleWorkBuffers.clear(); + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + if (!d->ps && !d->ds && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2] && !d->tess.tessControlComputeState) @@ -4726,6 +4857,8 @@ bool QMetalGraphicsPipeline::createVertexFragmentPipeline() d->vs.lib = lib; d->vs.func = func; d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->vs.desc = shader.description(); + d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->vs); [d->vs.lib retain]; [d->vs.func retain]; @@ -4735,6 +4868,8 @@ bool QMetalGraphicsPipeline::createVertexFragmentPipeline() d->fs.lib = lib; d->fs.func = func; d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->fs.desc = shader.description(); + d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->fs); [d->fs.lib retain]; [d->fs.func retain]; @@ -5283,7 +5418,9 @@ bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert } d->fs.lib = fragLib; d->fs.func = fragFunc; - d->fs.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey); + d->fs.desc = tessFrag.description(); + d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey); + d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey); if (!d->tess.teseFragRenderPipeline(rhiD, this)) { qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader"); @@ -5342,6 +5479,42 @@ bool QMetalGraphicsPipeline::create() if (!ok) return false; + // SPIRV-Cross buffer size buffers + int buffers = 0; + QVarLengthArray shaders; + if (d->tess.enabled) { + shaders.append(&d->tess.compVs[0]); + shaders.append(&d->tess.compVs[1]); + shaders.append(&d->tess.compVs[2]); + shaders.append(&d->tess.compTesc); + shaders.append(&d->tess.vertTese); + } else { + shaders.append(&d->vs); + } + shaders.append(&d->fs); + + for (QMetalShader *shader : shaders) { + if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1); + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first); + + // we use one buffer to hold data for all graphics shader stages, each with a different offset. + // buffer offsets must be 32byte aligned - adjust buffer count accordingly + buffers += ((maxNativeBinding + 1 + 7) / 8) * 8; + } + } + + if (buffers) { + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } + rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; generation += 1; @@ -5368,6 +5541,9 @@ void QMetalComputePipeline::destroy() if (!d->ps) return; + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + QRhiMetalData::DeferredReleaseEntry e; e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline; e.lastActiveFrameSlot = lastActiveFrameSlot; @@ -5436,6 +5612,14 @@ bool QMetalComputePipeline::create() d->cs.func = func; d->cs.localSize = shader.description().computeShaderLocalSize(); d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->cs.desc = shader.description(); + d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); + + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1); + } if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) { for (QMetalShader &s : rhiD->d->shaderCache) @@ -5470,6 +5654,21 @@ bool QMetalComputePipeline::create() return false; } + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + int buffers = 0; + for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks()) + buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first); + + buffers += 1; + + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } + rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; generation += 1; diff --git a/src/gui/rhi/qshader_p_p.h b/src/gui/rhi/qshader_p_p.h index 58fb2072c9..f0f990935a 100644 --- a/src/gui/rhi/qshader_p_p.h +++ b/src/gui/rhi/qshader_p_p.h @@ -39,7 +39,8 @@ struct Q_GUI_EXPORT QShaderPrivate MslTessTescTessLevelBufferBinding, MslTessTescPatchOutputBufferBinding, MslTessTescParamsBufferBinding, - MslTessTescInputBufferBinding + MslTessTescInputBufferBinding, + MslBufferSizeBufferBinding }; QShaderPrivate() diff --git a/tests/auto/gui/rhi/qrhi/data/buildshaders.bat b/tests/auto/gui/rhi/qrhi/data/buildshaders.bat index 1b5d4f28a6..37050fe80f 100644 --- a/tests/auto/gui/rhi/qrhi/data/buildshaders.bat +++ b/tests/auto/gui/rhi/qrhi/data/buildshaders.bat @@ -16,4 +16,8 @@ qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletes qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb - +qsb --glsl 320es,430 --msl 12 --msltess storagebuffer_runtime.vert -o storagebuffer_runtime.vert.qsb +qsb --glsl 320es,430 --msl 12 --tess-mode triangles storagebuffer_runtime.tesc -o storagebuffer_runtime.tesc.qsb +qsb --glsl 320es,430 --msl 12 --tess-vertex-count 3 storagebuffer_runtime.tese -o storagebuffer_runtime.tese.qsb +qsb --glsl 320es,430 --msl 12 storagebuffer_runtime.frag -o storagebuffer_runtime.frag.qsb +qsb --glsl 320es,430 --hlsl 50 -c --msl 12 storagebuffer_runtime.comp -o storagebuffer_runtime.comp.qsb diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp new file mode 100644 index 0000000000..d36f5426bc --- /dev/null +++ b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp @@ -0,0 +1,25 @@ +#version 430 + +layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, std430) buffer toGpu +{ + float _float[]; +}; + + +layout (binding = 1, std140) buffer fromGpu +{ + int _int[]; +}; + +void main() +{ + int length = min(_float.length(), _int.length()); + + for (int i = 0; i < length; ++i) + _int[i] = int(_float[i]); + +} + + diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp.qsb b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp.qsb new file mode 100644 index 0000000000..b4c43ecc9b Binary files /dev/null and b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.comp.qsb differ diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag new file mode 100644 index 0000000000..2e45a5f62a --- /dev/null +++ b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag @@ -0,0 +1,33 @@ +#version 450 + +layout (location = 0) out vec4 fragColor; + +layout (std430, binding = 1) readonly buffer ssboG +{ + float g[]; +}; + +layout (std430, binding = 2) readonly buffer ssboB +{ + float b[]; +}; + +layout (std430, binding = 6) readonly buffer ssboR +{ + float r[]; +}; + +layout (std430, binding = 3) readonly buffer ssbo3 +{ + vec4 _vec4; +}; + +void main() +{ + + // some OpenGL implementations will optimize out the buffer variables if we don't use them + // resulting in a .length() of 0. + float a = (r[0]+g[0]+b[0])>0?1:1; + + fragColor = a * vec4(r.length(), g.length(), b.length(), 255)/vec4(255); +} diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag.qsb b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag.qsb new file mode 100644 index 0000000000..53fc9a1906 Binary files /dev/null and b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.frag.qsb differ diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc new file mode 100644 index 0000000000..56060285d2 --- /dev/null +++ b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc @@ -0,0 +1,42 @@ +#version 450 + +layout(vertices = 3) out; + + +layout (std430, binding = 7) readonly buffer ssbo7 +{ + float float7[]; +}; + +layout (std430, binding = 8) readonly buffer ssbo8 +{ + float float8[]; +}; + +layout (std430, binding = 9) readonly buffer ssbo9 +{ + float float9[]; +}; + +layout (std430, binding = 10) readonly buffer ssbo10 +{ + float float10[]; +}; + +void main() +{ + + // some OpenGL implementations will optimize out the buffer variables if we don't use them + // resulting in a .length() of 0 + float a = float7[0] == 0 && float8[0] == 0 && float9[0] == 0 && float10[0] == 0 ? 1 : 1; + + if (gl_InvocationID == 0) { + gl_TessLevelOuter[0] = float7.length() * a; + gl_TessLevelOuter[1] = float8.length() * a; + gl_TessLevelOuter[2] = float9.length() * a; + gl_TessLevelInner[0] = float10.length() * a; + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + +} diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc.qsb b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc.qsb new file mode 100644 index 0000000000..e48aa0269c Binary files /dev/null and b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tesc.qsb differ diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese new file mode 100644 index 0000000000..a8bec13561 --- /dev/null +++ b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese @@ -0,0 +1,39 @@ +#version 450 + +layout(triangles, fractional_odd_spacing, ccw) in; + +layout (std140, binding = 6) uniform unused0 +{ + int unused; +}u0; + +layout (binding = 0) uniform u +{ + mat4 matrix; +}; + +layout (std430, binding = 5) readonly buffer ssbo5 +{ + float _float[]; +}; + +layout (std430, binding = 8) readonly buffer ssbo8 +{ + float float8[]; +}; + +layout (std430, binding = 1) readonly buffer unused1 +{ + int unused[]; +}u1; + + +void main() +{ + // some OpenGL implementations will optimize out the buffer variables if we don't use them + // resulting in a .length() of 0 + float a = _float[0] == 0 && float8[0] == 1 ? 1 : 1; + + if(_float.length() == 64) + gl_Position = a * matrix * ((gl_TessCoord.x * gl_in[0].gl_Position) + (gl_TessCoord.y * gl_in[1].gl_Position) + (gl_TessCoord.z * gl_in[2].gl_Position)) * (float8.length()==2?1:0); +} diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese.qsb b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese.qsb new file mode 100644 index 0000000000..23a433b5ae Binary files /dev/null and b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.tese.qsb differ diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert new file mode 100644 index 0000000000..b3ac10efea --- /dev/null +++ b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert @@ -0,0 +1,48 @@ +#version 450 + +layout (location = 0) in vec3 position; + +layout (std140, binding = 6) uniform unused0 +{ + int unused; +}u0; + +layout (binding = 0) uniform u +{ + mat4 matrix; +}; + +layout (std430, binding = 5) readonly buffer ssbo5 +{ + float _float[]; +}; + +layout (std140, binding = 3) readonly buffer ssbo3 +{ + vec4 _vec4; +}; + +layout (std430, binding = 4) readonly buffer ssbo1 +{ + bool _bool[]; +}; + +layout (std430, binding = 1) readonly buffer unused1 +{ + int unused[]; +}u1; + + +void main() +{ + + // some OpenGL implementations will optimize out the buffer variables if we don't use them + // resulting in a .length() of 0 + float a = _float[0] == 0 && _bool[0] ? 1 : 1; + + gl_Position = vec4(0); + + if(_bool.length() == 32) + gl_Position = a * matrix * vec4(position*_vec4.xyz, _float.length() == 64 ? 1.0 : 0.0); + +} diff --git a/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert.qsb b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert.qsb new file mode 100644 index 0000000000..8b1cff52fd Binary files /dev/null and b/tests/auto/gui/rhi/qrhi/data/storagebuffer_runtime.vert.qsb differ diff --git a/tests/auto/gui/rhi/qrhi/tst_qrhi.cpp b/tests/auto/gui/rhi/qrhi/tst_qrhi.cpp index a298a9f545..225226eccf 100644 --- a/tests/auto/gui/rhi/qrhi/tst_qrhi.cpp +++ b/tests/auto/gui/rhi/qrhi/tst_qrhi.cpp @@ -147,6 +147,10 @@ private slots: void storageBuffer_data(); void storageBuffer(); + void storageBufferRuntimeSizeCompute_data(); + void storageBufferRuntimeSizeCompute(); + void storageBufferRuntimeSizeGraphics_data(); + void storageBufferRuntimeSizeGraphics(); private: void setWindowType(QWindow *window, QRhi::Implementation impl); @@ -5889,5 +5893,298 @@ void tst_QRhi::storageBuffer() } } + void tst_QRhi::storageBufferRuntimeSizeCompute_data() +{ + rhiTestData(); +} + + void tst_QRhi::storageBufferRuntimeSizeCompute() +{ + // Use a compute shader to copy from one storage buffer with std430 runtime + // float array to another with std140 runtime int array. We fill the + // "toGpu" buffer with known float data generated and uploaded from the + // CPU, then dispatch a compute shader to copy from the "toGpu" buffer to + // the "fromGpu" buffer. We then readback the "fromGpu" buffer and verify + // that the results are as expected. This is primarily to test Metal + // SPIRV-Cross buffer size buffers. + + QFETCH(QRhi::Implementation, impl); + QFETCH(QRhiInitParams *, initParams); + + // we can't test with Null as there is no compute + if (impl == QRhi::Null) + return; + + QScopedPointer rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr)); + if (!rhi) + QSKIP("QRhi could not be created, skipping testing"); + + if (!rhi->isFeatureSupported(QRhi::Feature::Compute)) + QSKIP("Compute is not supported with this graphics API, skipping test"); + + QShader s = loadShader(":/data/storagebuffer_runtime.comp.qsb"); + QVERIFY(s.isValid()); + QCOMPARE(s.description().storageBlocks().size(), 2); + + QMap blocks; + for (const QShaderDescription::StorageBlock &block : s.description().storageBlocks()) + blocks[block.blockName] = block; + + QMap toGpuMembers; + for (const QShaderDescription::BlockVariable &member : blocks["toGpu"].members) + toGpuMembers[member.name] = member; + + QMap fromGpuMembers; + for (const QShaderDescription::BlockVariable &member : blocks["fromGpu"].members) + fromGpuMembers[member.name] = member; + + for (QRhiBuffer::Type type : { QRhiBuffer::Type::Immutable, QRhiBuffer::Type::Static }) { + QRhiCommandBuffer *cb = nullptr; + + rhi->beginOffscreenFrame(&cb); + QVERIFY(cb); + + QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch(); + QVERIFY(u); + + const int stride430 = sizeof(float); + const int stride140 = 4 * sizeof(float); + const int length = 32; + + QScopedPointer toGpuBuffer( + rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, + blocks["toGpu"].knownSize + length * stride430)); + QVERIFY(toGpuBuffer->create()); + + QScopedPointer fromGpuBuffer( + rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, + blocks["fromGpu"].knownSize + length * stride140)); + QVERIFY(fromGpuBuffer->create()); + + QByteArray toGpuData(toGpuBuffer->size(), 0); + for (int i = 0; i < length; ++i) + reinterpret_cast(toGpuData.data()[toGpuMembers["_float"].offset + i * stride430]) = float(i); + + u->uploadStaticBuffer(toGpuBuffer.data(), 0, toGpuData.size(), toGpuData.constData()); + u->uploadStaticBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, + QByteArray(fromGpuBuffer->size(), 0).constData()); + + QScopedPointer srb(rhi->newShaderResourceBindings()); + srb->setBindings( + { QRhiShaderResourceBinding::bufferLoadStore( + blocks["toGpu"].binding, QRhiShaderResourceBinding::ComputeStage, + toGpuBuffer.data()), + QRhiShaderResourceBinding::bufferLoadStore( + blocks["fromGpu"].binding, QRhiShaderResourceBinding::ComputeStage, + fromGpuBuffer.data()) }); + QVERIFY(srb->create()); + + QScopedPointer pipeline(rhi->newComputePipeline()); + pipeline->setShaderStage({ QRhiShaderStage::Compute, s }); + pipeline->setShaderResourceBindings(srb.data()); + QVERIFY(pipeline->create()); + + cb->beginComputePass(u); + + cb->setComputePipeline(pipeline.data()); + cb->setShaderResources(); + cb->dispatch(1, 1, 1); + + u = rhi->nextResourceUpdateBatch(); + QVERIFY(u); + int readbackCompleted = 0; + QRhiBufferReadbackResult result; + result.completed = [&readbackCompleted]() { readbackCompleted++; }; + u->readBackBuffer(fromGpuBuffer.data(), 0, fromGpuBuffer->size(), &result); + + cb->endComputePass(u); + + rhi->endOffscreenFrame(); + + QVERIFY(readbackCompleted > 0); + QCOMPARE(result.data.size(), fromGpuBuffer->size()); + + for (int i = 0; i < length; ++i) + QCOMPARE(reinterpret_cast(result.data.constData()[fromGpuMembers["_int"].offset + i * stride140]), i); + + QCOMPARE(readbackCompleted, 1); + + } + +} + +void tst_QRhi::storageBufferRuntimeSizeGraphics_data() +{ + rhiTestData(); +} + +void tst_QRhi::storageBufferRuntimeSizeGraphics() +{ + // Draws a tessellated triangle with color determined by the length of + // buffers bound to shader stages. This is primarily to test Metal + // SPIRV-Cross buffer size buffers. + + QFETCH(QRhi::Implementation, impl); + QFETCH(QRhiInitParams *, initParams); + + QScopedPointer rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr)); + if (!rhi) + QSKIP("QRhi could not be created, skipping testing rendering"); + + if (!rhi->isFeatureSupported(QRhi::Tessellation)) { + // From a Vulkan or Metal implementation we expect tessellation to work, + // even though it is optional (as per spec) for Vulkan. + QVERIFY(rhi->backend() != QRhi::Vulkan); + QVERIFY(rhi->backend() != QRhi::Metal); + QSKIP("Tessellation is not supported with this graphics API, skipping test"); + } + + if (rhi->backend() == QRhi::D3D11) + QSKIP("Skipping tessellation test on D3D for now, test assets not prepared for HLSL yet"); + + QScopedPointer texture(rhi->newTexture(QRhiTexture::RGBA8, QSize(64, 64), 1, + QRhiTexture::RenderTarget | QRhiTexture::UsedAsTransferSource)); + QVERIFY(texture->create()); + + QScopedPointer rt(rhi->newTextureRenderTarget({ texture.data() })); + QScopedPointer rpDesc(rt->newCompatibleRenderPassDescriptor()); + rt->setRenderPassDescriptor(rpDesc.data()); + QVERIFY(rt->create()); + + static const float triangleVertices[] = { + 0.0f, 0.5f, 0.0f, + -0.5f, -0.5f, 0.0f, + 0.5f, -0.5f, 0.0f, + }; + + QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch(); + QScopedPointer vbuf(rhi->newBuffer(QRhiBuffer::Immutable, QRhiBuffer::VertexBuffer, sizeof(triangleVertices))); + QVERIFY(vbuf->create()); + u->uploadStaticBuffer(vbuf.data(), triangleVertices); + + QScopedPointer ubuf(rhi->newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 64)); + QVERIFY(ubuf->create()); + + QMatrix4x4 mvp = rhi->clipSpaceCorrMatrix(); + u->updateDynamicBuffer(ubuf.data(), 0, 64, mvp.constData()); + + QScopedPointer ssbo5(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 256)); + QVERIFY(ssbo5->create()); + + QScopedPointer ssbo3(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 16)); + QVERIFY(ssbo3->create()); + + u->uploadStaticBuffer(ssbo3.data(), QVector({ 1.0f, 1.0f, 1.0f, 1.0f }).constData()); + + QScopedPointer ssbo4(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, 128)); + QVERIFY(ssbo4->create()); + + const int red = 79; + const int green = 43; + const int blue = 251; + + QScopedPointer ssboR(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, red * sizeof(float))); + QVERIFY(ssboR->create()); + + QScopedPointer ssboG(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, green * sizeof(float))); + QVERIFY(ssboG->create()); + + QScopedPointer ssboB(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, blue * sizeof(float))); + QVERIFY(ssboB->create()); + + const int tessOuter0 = 1; + const int tessOuter1 = 2; + const int tessOuter2 = 3; + const int tessInner0 = 4; + + QScopedPointer ssboTessOuter0(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter0 * sizeof(float))); + QVERIFY(ssboTessOuter0->create()); + + QScopedPointer ssboTessOuter1(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter1 * sizeof(float))); + QVERIFY(ssboTessOuter1->create()); + + QScopedPointer ssboTessOuter2(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessOuter2 * sizeof(float))); + QVERIFY(ssboTessOuter2->create()); + + QScopedPointer ssboTessInner0(rhi->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, tessInner0 * sizeof(float))); + QVERIFY(ssboTessInner0->create()); + + + QScopedPointer srb(rhi->newShaderResourceBindings()); + srb->setBindings({ QRhiShaderResourceBinding::uniformBuffer(0, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ubuf.data()), + QRhiShaderResourceBinding::bufferLoad(5, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssbo5.data()), + QRhiShaderResourceBinding::bufferLoad(3, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage | QRhiShaderResourceBinding::FragmentStage, ssbo3.data()), + QRhiShaderResourceBinding::bufferLoad(4, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssbo4.data()), + QRhiShaderResourceBinding::bufferLoad(7, QRhiShaderResourceBinding::TessellationControlStage, ssboTessOuter0.data()), + QRhiShaderResourceBinding::bufferLoad(8, QRhiShaderResourceBinding::TessellationControlStage | QRhiShaderResourceBinding::TessellationEvaluationStage, ssboTessOuter1.data()), + QRhiShaderResourceBinding::bufferLoad(9, QRhiShaderResourceBinding::TessellationControlStage, ssboTessOuter2.data()), + QRhiShaderResourceBinding::bufferLoad(10, QRhiShaderResourceBinding::TessellationControlStage, ssboTessInner0.data()), + QRhiShaderResourceBinding::bufferLoad(1, QRhiShaderResourceBinding::FragmentStage, ssboG.data()), + QRhiShaderResourceBinding::bufferLoad(2, QRhiShaderResourceBinding::FragmentStage, ssboB.data()), + QRhiShaderResourceBinding::bufferLoad(6, QRhiShaderResourceBinding::FragmentStage, ssboR.data()) }); + + QVERIFY(srb->create()); + + QScopedPointer pipeline(rhi->newGraphicsPipeline()); + + pipeline->setTopology(QRhiGraphicsPipeline::Patches); + pipeline->setPatchControlPointCount(3); + + pipeline->setShaderStages({ + { QRhiShaderStage::Vertex, loadShader(":/data/storagebuffer_runtime.vert.qsb") }, + { QRhiShaderStage::TessellationControl, loadShader(":/data/storagebuffer_runtime.tesc.qsb") }, + { QRhiShaderStage::TessellationEvaluation, loadShader(":/data/storagebuffer_runtime.tese.qsb") }, + { QRhiShaderStage::Fragment, loadShader(":/data/storagebuffer_runtime.frag.qsb") } + }); + + pipeline->setCullMode(QRhiGraphicsPipeline::None); + + QRhiVertexInputLayout inputLayout; + inputLayout.setBindings({ + { 3 * sizeof(float) } + }); + inputLayout.setAttributes({ + { 0, 0, QRhiVertexInputAttribute::Float3, 0 }, + }); + + pipeline->setVertexInputLayout(inputLayout); + pipeline->setShaderResourceBindings(srb.data()); + pipeline->setRenderPassDescriptor(rpDesc.data()); + + QVERIFY(pipeline->create()); + + QRhiCommandBuffer *cb = nullptr; + QCOMPARE(rhi->beginOffscreenFrame(&cb), QRhi::FrameOpSuccess); + + cb->beginPass(rt.data(), Qt::black, { 1.0f, 0 }, u); + cb->setGraphicsPipeline(pipeline.data()); + cb->setViewport({ 0, 0, float(rt->pixelSize().width()), float(rt->pixelSize().height()) }); + cb->setShaderResources(); + QRhiCommandBuffer::VertexInput vbufBinding(vbuf.data(), 0); + cb->setVertexInput(0, 1, &vbufBinding); + cb->draw(3); + + QRhiReadbackResult readResult; + QImage result; + readResult.completed = [&readResult, &result] { + result = QImage(reinterpret_cast(readResult.data.constData()), + readResult.pixelSize.width(), readResult.pixelSize.height(), + QImage::Format_RGBA8888); + }; + QRhiResourceUpdateBatch *readbackBatch = rhi->nextResourceUpdateBatch(); + readbackBatch->readBackTexture({ texture.data() }, &readResult); + cb->endPass(readbackBatch); + + rhi->endOffscreenFrame(); + + QCOMPARE(result.size(), rt->pixelSize()); + + // cannot check rendering results with Null, because there is no rendering there + if (impl == QRhi::Null) + return; + + QCOMPARE(result.pixel(32, 32), qRgb(red, green, blue)); +} + #include QTEST_MAIN(tst_QRhi)