rhi: Add manual test for RGBA16F and compute

Uses the two compute shaders from Qt Quick 3D. Demonstrates and tests both RGBA16F textures and using them (and doing load/store with mip levels individually) in combination with compute. Task-number: QTBUG-81213 Change-Id: I3f0f250d5997a26c857b7c45517684c63b44e58e Reviewed-by: Johan Helsing <johan.helsing@qt.io>
2020-01-07 15:15:09 +01:00 · 2020-01-07 15:15:09 +01:00 · fe97af0c9a
commit fe97af0c9a
parent ccb2cb84f5
9 changed files with 402 additions and 0 deletions
--- a/tests/manual/rhi/float16texture_with_compute/buildshaders.sh
+++ b/tests/manual/rhi/float16texture_with_compute/buildshaders.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+qsb --glsl "430,310 es" --hlsl 50 --msl 12 load.comp -o load.comp.qsb
+qsb --glsl "430,310 es" --hlsl 50 --msl 12 prefilter.comp -o prefilter.comp.qsb
--- a/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.cpp
+++ b/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.cpp
@ -0,0 +1,312 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the examples of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:BSD$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** BSD License Usage
+** Alternatively, you may use this file under the terms of the BSD license
+** as follows:
+**
+** "Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are
+** met:
+**   * Redistributions of source code must retain the above copyright
+**     notice, this list of conditions and the following disclaimer.
+**   * Redistributions in binary form must reproduce the above copyright
+**     notice, this list of conditions and the following disclaimer in
+**     the documentation and/or other materials provided with the
+**     distribution.
+**   * Neither the name of The Qt Company Ltd nor the names of its
+**     contributors may be used to endorse or promote products derived
+**     from this software without specific prior written permission.
+**
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// An advanced version of floattexture. Instead of RGBA32F, we use RGBA16F, and
+// also generate the floating point data from rgba with compute. Then there's a
+// compute pass using the BSDF prefiltering taken from Qt Quick 3D, which
+// generates all the mip levels.
+
+// Why do we animate the scale of the quad rendered to the window? To have
+// different mip levels used, to prove that all of them are generated
+// correctly, without artifacts (which would occur if memory barriers were not
+// correctly generated by QRhi). For full verification use RenderDoc or similar.
+
+#include "../shared/examplefw.h"
+#include <qmath.h>
+
+static float vertexData[] =
+{ // Y up, CCW
+  -0.5f,   0.5f,   0.0f, 0.0f,
+  -0.5f,  -0.5f,   0.0f, 1.0f,
+  0.5f,   -0.5f,   1.0f, 1.0f,
+  0.5f,   0.5f,    1.0f, 0.0f
+};
+
+static quint16 indexData[] =
+{
+    0, 1, 2, 0, 2, 3
+};
+
+static const int MAX_MIP_LEVELS = 20;
+
+struct {
+    QVector<QRhiResource *> releasePool;
+
+    QRhiBuffer *vbuf = nullptr;
+    QRhiBuffer *ibuf = nullptr;
+    QRhiBuffer *ubuf = nullptr;
+    QRhiTexture *texRgba = nullptr;
+    QRhiTexture *texFloat16 = nullptr;
+    QRhiSampler *sampler = nullptr;
+    QRhiShaderResourceBindings *srb = nullptr;
+    QRhiGraphicsPipeline *ps = nullptr;
+
+    QRhiBuffer *computeUBuf_load = nullptr;
+    QRhiShaderResourceBindings *computeBindings_load = nullptr;
+    QRhiComputePipeline *computePipeline_load = nullptr;
+    QRhiBuffer *computeUBuf_prefilter = nullptr;
+    QRhiShaderResourceBindings *computeBindings_prefilter[MAX_MIP_LEVELS];
+    QRhiComputePipeline *computePipeline_prefilter = nullptr;
+
+    QRhiResourceUpdateBatch *initialUpdates = nullptr;
+    bool computeDone = false;
+    int mipCount;
+    int prefilterUBufElemSize;
+    quint32 prefilterNumWorkGroups[MAX_MIP_LEVELS][3];
+    float scale = 2.5f;
+    int scale_dir = -1;
+} d;
+
+void recordUploadThenFilterFloat16TextureWithCompute(QRhiCommandBuffer *cb)
+{
+    const int w = d.texRgba->pixelSize().width() / 16;
+    const int h = d.texRgba->pixelSize().height() / 16;
+
+    cb->beginComputePass();
+
+    cb->setComputePipeline(d.computePipeline_load);
+    cb->setShaderResources();
+    cb->dispatch(w, h, 1);
+
+    cb->setComputePipeline(d.computePipeline_prefilter);
+    for (int level = 1; level < d.mipCount; ++level) {
+        const int i = level - 1;
+        const int mipW = d.prefilterNumWorkGroups[i][0];
+        const int mipH = d.prefilterNumWorkGroups[i][1];
+        QPair<int, quint32> dynamicOffset = { 0, quint32(d.prefilterUBufElemSize * i) };
+        cb->setShaderResources(d.computeBindings_prefilter[i], 1, &dynamicOffset);
+        cb->dispatch(mipW, mipH, 1);
+    }
+
+    cb->endComputePass();
+}
+
+void Window::customInit()
+{
+    if (!m_r->isFeatureSupported(QRhi::Compute))
+        qFatal("Compute is not supported");
+
+    if (!m_r->isTextureFormatSupported(QRhiTexture::RGBA16F))
+        qFatal("RGBA16F texture format is not supported");
+
+    d.initialUpdates = m_r->nextResourceUpdateBatch();
+
+    // load rgba8 image data
+
+    QImage image;
+    image.load(QLatin1String(":/qt256.png"));
+    image = image.convertToFormat(QImage::Format_RGBA8888);
+    Q_ASSERT(!image.isNull());
+    d.texRgba = m_r->newTexture(QRhiTexture::RGBA8, image.size(), 1, QRhiTexture::UsedWithLoadStore);
+    d.texRgba->build();
+    d.releasePool << d.texRgba;
+
+    d.initialUpdates->uploadTexture(d.texRgba, image);
+
+    d.mipCount = m_r->mipLevelsForSize(image.size());
+    Q_ASSERT(d.mipCount <= MAX_MIP_LEVELS);
+
+    d.texFloat16 = m_r->newTexture(QRhiTexture::RGBA16F, image.size(), 1, QRhiTexture::UsedWithLoadStore | QRhiTexture::MipMapped);
+    d.releasePool << d.texFloat16;
+    d.texFloat16->build();
+
+    // compute
+
+    d.computeUBuf_load = m_r->newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 12);
+    d.computeUBuf_load->build();
+    d.releasePool << d.computeUBuf_load;
+
+    quint32 numWorkGroups[3] = { quint32(image.width()), quint32(image.height()), 0 };
+    d.initialUpdates->updateDynamicBuffer(d.computeUBuf_load, 0, 12, numWorkGroups);
+
+    d.computeBindings_load = m_r->newShaderResourceBindings();
+    d.computeBindings_load->setBindings({
+                                            QRhiShaderResourceBinding::uniformBuffer(0, QRhiShaderResourceBinding::ComputeStage, d.computeUBuf_load),
+                                            QRhiShaderResourceBinding::imageLoad(1, QRhiShaderResourceBinding::ComputeStage, d.texRgba, 0),
+                                            QRhiShaderResourceBinding::imageStore(2, QRhiShaderResourceBinding::ComputeStage, d.texFloat16, 0)
+                                        });
+    d.computeBindings_load->build();
+    d.releasePool << d.computeBindings_load;
+
+    d.computePipeline_load = m_r->newComputePipeline();
+    d.computePipeline_load->setShaderResourceBindings(d.computeBindings_load);
+    d.computePipeline_load->setShaderStage({ QRhiShaderStage::Compute, getShader(QLatin1String(":/load.comp.qsb")) });
+    d.computePipeline_load->build();
+    d.releasePool << d.computePipeline_load;
+
+    d.prefilterUBufElemSize = m_r->ubufAligned(12);
+    d.computeUBuf_prefilter = m_r->newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, d.prefilterUBufElemSize * d.mipCount);
+    d.computeUBuf_prefilter->build();
+    d.releasePool << d.computeUBuf_prefilter;
+
+    int mipW = image.width() >> 1;
+    int mipH = image.height() >> 1;
+    for (int level = 1; level < d.mipCount; ++level) {
+        const int i = level - 1;
+        d.prefilterNumWorkGroups[i][0] = quint32(mipW);
+        d.prefilterNumWorkGroups[i][1] = quint32(mipH);
+        d.prefilterNumWorkGroups[i][2] = 0;
+        d.initialUpdates->updateDynamicBuffer(d.computeUBuf_prefilter, d.prefilterUBufElemSize * i, 12, d.prefilterNumWorkGroups[i]);
+        mipW = mipW > 2 ? mipW >> 1 : 1;
+        mipH = mipH > 2 ? mipH >> 1 : 1;
+
+        d.computeBindings_prefilter[i] = m_r->newShaderResourceBindings();
+        d.computeBindings_prefilter[i]->setBindings({
+                                                        QRhiShaderResourceBinding::uniformBufferWithDynamicOffset(0, QRhiShaderResourceBinding::ComputeStage, d.computeUBuf_prefilter, 12),
+                                                        QRhiShaderResourceBinding::imageLoad(1, QRhiShaderResourceBinding::ComputeStage, d.texFloat16, level - 1),
+                                                        QRhiShaderResourceBinding::imageStore(2, QRhiShaderResourceBinding::ComputeStage, d.texFloat16, level)
+                                                    });
+        d.computeBindings_prefilter[i]->build();
+        d.releasePool << d.computeBindings_prefilter[i];
+    }
+
+    d.computePipeline_prefilter = m_r->newComputePipeline();
+    d.computePipeline_prefilter->setShaderResourceBindings(d.computeBindings_prefilter[0]); // just need a layout compatible one
+    d.computePipeline_prefilter->setShaderStage({ QRhiShaderStage::Compute, getShader(QLatin1String(":/prefilter.comp.qsb")) });
+    d.computePipeline_prefilter->build();
+    d.releasePool << d.computePipeline_prefilter;
+
+    // graphics
+
+    d.vbuf = m_r->newBuffer(QRhiBuffer::Immutable, QRhiBuffer::VertexBuffer, sizeof(vertexData));
+    d.vbuf->build();
+    d.releasePool << d.vbuf;
+
+    d.ibuf = m_r->newBuffer(QRhiBuffer::Immutable, QRhiBuffer::IndexBuffer, sizeof(indexData));
+    d.ibuf->build();
+    d.releasePool << d.ibuf;
+
+    d.ubuf = m_r->newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 68);
+    d.ubuf->build();
+    d.releasePool << d.ubuf;
+
+    // enable mipmaps
+    d.sampler = m_r->newSampler(QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::Linear,
+                                QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
+    d.releasePool << d.sampler;
+    d.sampler->build();
+
+    d.srb = m_r->newShaderResourceBindings();
+    d.releasePool << d.srb;
+    d.srb->setBindings({
+        QRhiShaderResourceBinding::uniformBuffer(0, QRhiShaderResourceBinding::VertexStage | QRhiShaderResourceBinding::FragmentStage, d.ubuf),
+        QRhiShaderResourceBinding::sampledTexture(1, QRhiShaderResourceBinding::FragmentStage, d.texFloat16, d.sampler)
+    });
+    d.srb->build();
+
+    d.ps = m_r->newGraphicsPipeline();
+    d.releasePool << d.ps;
+    d.ps->setShaderStages({
+        { QRhiShaderStage::Vertex, getShader(QLatin1String(":/texture.vert.qsb")) },
+        { QRhiShaderStage::Fragment, getShader(QLatin1String(":/texture.frag.qsb")) }
+    });
+    QRhiVertexInputLayout inputLayout;
+    inputLayout.setBindings({
+        { 4 * sizeof(float) }
+    });
+    inputLayout.setAttributes({
+        { 0, 0, QRhiVertexInputAttribute::Float2, 0 },
+        { 0, 1, QRhiVertexInputAttribute::Float2, 2 * sizeof(float) }
+    });
+    d.ps->setVertexInputLayout(inputLayout);
+    d.ps->setShaderResourceBindings(d.srb);
+    d.ps->setRenderPassDescriptor(m_rp);
+    d.ps->build();
+
+    d.initialUpdates->uploadStaticBuffer(d.vbuf, vertexData);
+    d.initialUpdates->uploadStaticBuffer(d.ibuf, indexData);
+
+    qint32 flip = 0;
+    d.initialUpdates->updateDynamicBuffer(d.ubuf, 64, 4, &flip);
+}
+
+void Window::customRelease()
+{
+    qDeleteAll(d.releasePool);
+    d.releasePool.clear();
+}
+
+void Window::customRender()
+{
+    QRhiCommandBuffer *cb = m_sc->currentFrameCommandBuffer();
+    QRhiResourceUpdateBatch *u = m_r->nextResourceUpdateBatch();
+    if (d.initialUpdates) {
+        u->merge(d.initialUpdates);
+        d.initialUpdates->release();
+        d.initialUpdates = nullptr;
+    }
+
+    QMatrix4x4 mvp = m_proj;
+    mvp.scale(d.scale);
+    d.scale += d.scale_dir * 0.01f;
+    if (qFuzzyIsNull(d.scale) || d.scale >= 2.5f)
+        d.scale_dir *= -1;
+    u->updateDynamicBuffer(d.ubuf, 0, 64, mvp.constData());
+
+    cb->resourceUpdate(u);
+
+    // If not yet done, then do a compute pass that uploads level 0, doing an
+    // rgba8 -> float16 conversion. Follow that with another compute pass to do
+    // the filtering and generate all the mip levels.
+    if (!d.computeDone) {
+        recordUploadThenFilterFloat16TextureWithCompute(cb);
+        d.computeDone = true;
+    }
+
+    const QSize outputSizeInPixels = m_sc->currentPixelSize();
+    cb->beginPass(m_sc->currentFrameRenderTarget(), m_clearColor, { 1.0f, 0 });
+    cb->setGraphicsPipeline(d.ps);
+    cb->setViewport({ 0, 0, float(outputSizeInPixels.width()), float(outputSizeInPixels.height()) });
+    cb->setShaderResources();
+    const QRhiCommandBuffer::VertexInput vbufBinding(d.vbuf, 0);
+    cb->setVertexInput(0, 1, &vbufBinding, d.ibuf, 0, QRhiCommandBuffer::IndexUInt16);
+    cb->drawIndexed(6);
+    cb->endPass();
+}
--- a/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.pro
+++ b/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.pro
@ -0,0 +1,8 @@
+TEMPLATE = app
+
+QT += gui-private
+
+SOURCES = \
+    float16texture_with_compute.cpp
+
+RESOURCES = float16texture_with_compute.qrc
--- a/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.qrc
+++ b/tests/manual/rhi/float16texture_with_compute/float16texture_with_compute.qrc
@ -0,0 +1,9 @@
+<!DOCTYPE RCC><RCC version="1.0">
+<qresource>
+  <file>load.comp.qsb</file>
+  <file>prefilter.comp.qsb</file>
+  <file alias="texture.vert.qsb">../shared/texture.vert.qsb</file>
+  <file alias="texture.frag.qsb">../shared/texture.frag.qsb</file>
+  <file alias="qt256.png">../shared/qt256.png</file>
+</qresource>
+</RCC>
--- a/tests/manual/rhi/float16texture_with_compute/load.comp
+++ b/tests/manual/rhi/float16texture_with_compute/load.comp
@ -0,0 +1,19 @@
+#version 440
+
+layout(local_size_x = 16, local_size_y = 16) in;
+layout(rgba8, binding = 1) readonly uniform image2D inputImage;
+layout(rgba16f, binding = 2) writeonly uniform image2D outputImage;
+
+// There is no equivalent of gl_NumWorkGroups in HLSL. So instead pass the
+// values in in a uniform buffer.
+layout(std140, binding = 0) uniform numWorkGroupsBuf {
+    uvec3 numWorkGroups;
+};
+
+void main()
+{
+    if (gl_GlobalInvocationID.x >= numWorkGroups.x || gl_GlobalInvocationID.y >= numWorkGroups.y)
+        return;
+    vec4 value = imageLoad(inputImage, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(outputImage, ivec2(gl_GlobalInvocationID.xy), value);
+}
--- a/tests/manual/rhi/float16texture_with_compute/load.comp.qsb
+++ b/tests/manual/rhi/float16texture_with_compute/load.comp.qsb
--- a/tests/manual/rhi/float16texture_with_compute/prefilter.comp
+++ b/tests/manual/rhi/float16texture_with_compute/prefilter.comp
@ -0,0 +1,50 @@
+#version 440
+
+layout(local_size_x = 16, local_size_y = 16) in;
+layout(rgba16f, binding = 1) readonly uniform image2D inputImage;
+layout(rgba16f, binding = 2) writeonly uniform image2D outputImage;
+
+// There is no equivalent of gl_NumWorkGroups in HLSL. So instead pass the
+// values in in a uniform buffer.
+layout(std140, binding = 0) uniform numWorkGroupsBuf {
+    uvec3 numWorkGroups;
+};
+
+int wrapMod( in int a, in int base )
+{
+    return ( a >= 0 ) ? a % base : -(a % base) + base;
+}
+
+void getWrappedCoords( inout int sX, inout int sY, in int width, in int height )
+{
+    if (sY < 0) { sX -= width >> 1; sY = -sY; }
+    if (sY >= height) { sX += width >> 1; sY = height - sY; }
+    sX = wrapMod( sX, width );
+}
+
+void main()
+{
+    int prevWidth = int(numWorkGroups.x) << 1;
+    int prevHeight = int(numWorkGroups.y) << 1;
+    if (gl_GlobalInvocationID.x >= numWorkGroups.x || gl_GlobalInvocationID.y >= numWorkGroups.y)
+        return;
+    vec4 accumVal = vec4(0.0);
+    for (int sy = -2; sy <= 2; ++sy) {
+        for (int sx = -2; sx <= 2; ++sx) {
+            int sampleX = sx + (int(gl_GlobalInvocationID.x) << 1);
+            int sampleY = sy + (int(gl_GlobalInvocationID.y) << 1);
+            getWrappedCoords(sampleX, sampleY, prevWidth, prevHeight);
+            if ((sampleY * prevWidth + sampleX) < 0 )
+                sampleY = prevHeight + sampleY;
+            ivec2 pos = ivec2(sampleX, sampleY);
+            vec4 value = imageLoad(inputImage, pos);
+            float filterPdf = 1.0 / ( 1.0 + float(sx*sx + sy*sy)*2.0 );
+            filterPdf /= 4.71238898;
+            accumVal[0] += filterPdf * value.r;
+            accumVal[1] += filterPdf * value.g;
+            accumVal[2] += filterPdf * value.b;
+            accumVal[3] += filterPdf * value.a;
+        }
+    }
+    imageStore(outputImage, ivec2(gl_GlobalInvocationID.xy), accumVal);
+}
--- a/tests/manual/rhi/float16texture_with_compute/prefilter.comp.qsb
+++ b/tests/manual/rhi/float16texture_with_compute/prefilter.comp.qsb
--- a/tests/manual/rhi/rhi.pro
+++ b/tests/manual/rhi/rhi.pro
@ -14,6 +14,7 @@ SUBDIRS += \
    triquadcube \
    offscreen \
    floattexture \
+    float16texture_with_compute \
    mrt \
    shadowmap \
    computebuffer \