From 1d6163577c8a4f1372208e2c9e03b1a69906d385 Mon Sep 17 00:00:00 2001 From: Chris Dalton Date: Wed, 31 May 2017 12:51:23 -0600 Subject: [PATCH] Add support for instanced draws Adds an instance buffer to GrMesh and instance attribs to GrPrimitiveProcessor. Implements support in GL and Vulkan. Adds unit tests for instanced rendering with GrMesh. Bug: skia: Change-Id: If1a9920feb9366f346b8c37cf914713c49129b3a Reviewed-on: https://skia-review.googlesource.com/16200 Reviewed-by: Brian Salomon Commit-Queue: Chris Dalton --- include/gpu/GrCaps.h | 2 + include/gpu/GrShaderCaps.h | 3 + src/gpu/GrCaps.cpp | 2 + src/gpu/GrGeometryProcessor.h | 17 ---- src/gpu/GrGpuCommandBuffer.cpp | 4 +- src/gpu/GrMesh.h | 100 +++++++++++++++++++-- src/gpu/GrPrimitiveProcessor.h | 82 +++++++++++------ src/gpu/GrShaderCaps.cpp | 2 + src/gpu/gl/GrGLCaps.cpp | 37 ++++---- src/gpu/gl/GrGLGpu.cpp | 80 ++++++++++++----- src/gpu/gl/GrGLGpu.h | 15 +++- src/gpu/gl/GrGLVertexArray.cpp | 12 ++- src/gpu/gl/GrGLVertexArray.h | 11 ++- src/gpu/ops/GrDefaultPathRenderer.cpp | 2 +- src/gpu/ops/GrDrawVerticesOp.cpp | 2 +- src/gpu/ops/GrMSAAPathRenderer.cpp | 8 +- src/gpu/ops/GrNonAAFillRectOp.cpp | 1 + src/gpu/ops/GrNonAAStrokeRectOp.cpp | 2 +- src/gpu/ops/GrTessellatingPathRenderer.cpp | 2 +- src/gpu/vk/GrVkCaps.cpp | 2 + src/gpu/vk/GrVkCommandBuffer.cpp | 6 +- src/gpu/vk/GrVkCommandBuffer.h | 26 +++--- src/gpu/vk/GrVkCopyManager.cpp | 2 +- src/gpu/vk/GrVkGpuCommandBuffer.cpp | 69 +++++++++----- src/gpu/vk/GrVkGpuCommandBuffer.h | 30 +++++-- src/gpu/vk/GrVkPipeline.cpp | 51 ++++++----- tests/GrMeshTest.cpp | 99 +++++++++++++++++--- tests/PrimitiveProcessorTest.cpp | 2 +- 28 files changed, 492 insertions(+), 179 deletions(-) diff --git a/include/gpu/GrCaps.h b/include/gpu/GrCaps.h index 4dc776734e..9b84b1da8a 100644 --- a/include/gpu/GrCaps.h +++ b/include/gpu/GrCaps.h @@ -49,6 +49,7 @@ public: bool textureBarrierSupport() const { return fTextureBarrierSupport; } bool sampleLocationsSupport() const { return fSampleLocationsSupport; } bool multisampleDisableSupport() const { return fMultisampleDisableSupport; } + bool instanceAttribSupport() const { return fInstanceAttribSupport; } bool usesMixedSamples() const { return fUsesMixedSamples; } bool preferClientSideDynamicBuffers() const { return fPreferClientSideDynamicBuffers; } @@ -215,6 +216,7 @@ protected: bool fTextureBarrierSupport : 1; bool fSampleLocationsSupport : 1; bool fMultisampleDisableSupport : 1; + bool fInstanceAttribSupport : 1; bool fUsesMixedSamples : 1; bool fPreferClientSideDynamicBuffers : 1; bool fFullClearIsFree : 1; diff --git a/include/gpu/GrShaderCaps.h b/include/gpu/GrShaderCaps.h index e5edc7ebea..006efb6125 100644 --- a/include/gpu/GrShaderCaps.h +++ b/include/gpu/GrShaderCaps.h @@ -130,6 +130,8 @@ public: bool texelFetchSupport() const { return fTexelFetchSupport; } + bool vertexIDSupport() const { return fVertexIDSupport; } + AdvBlendEqInteraction advBlendEqInteraction() const { return fAdvBlendEqInteraction; } bool mustEnableAdvBlendEqs() const { @@ -284,6 +286,7 @@ private: bool fSampleMaskOverrideCoverageSupport : 1; bool fExternalTextureSupport : 1; bool fTexelFetchSupport : 1; + bool fVertexIDSupport : 1; // Used for specific driver bug work arounds bool fCanUseMinAndAbsTogether : 1; diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp index 5c04d190c5..9c8b1db19c 100644 --- a/src/gpu/GrCaps.cpp +++ b/src/gpu/GrCaps.cpp @@ -43,6 +43,7 @@ GrCaps::GrCaps(const GrContextOptions& options) { fTextureBarrierSupport = false; fSampleLocationsSupport = false; fMultisampleDisableSupport = false; + fInstanceAttribSupport = false; fUsesMixedSamples = false; fPreferClientSideDynamicBuffers = false; fFullClearIsFree = false; @@ -132,6 +133,7 @@ SkString GrCaps::dump() const { r.appendf("Texture Barrier Support : %s\n", gNY[fTextureBarrierSupport]); r.appendf("Sample Locations Support : %s\n", gNY[fSampleLocationsSupport]); r.appendf("Multisample disable support : %s\n", gNY[fMultisampleDisableSupport]); + r.appendf("Instance Attrib Support : %s\n", gNY[fInstanceAttribSupport]); r.appendf("Uses Mixed Samples : %s\n", gNY[fUsesMixedSamples]); r.appendf("Prefer client-side dynamic buffers : %s\n", gNY[fPreferClientSideDynamicBuffers]); r.appendf("Full screen clear is free : %s\n", gNY[fFullClearIsFree]); diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h index e5222bf80a..086d9baa53 100644 --- a/src/gpu/GrGeometryProcessor.h +++ b/src/gpu/GrGeometryProcessor.h @@ -40,23 +40,6 @@ public: } protected: - /** - * Subclasses call this from their constructor to register vertex attributes. Attributes - * will be padded to the nearest 4 bytes for performance reasons. - * TODO After deferred geometry, we should do all of this inline in GenerateGeometry alongside - * the struct used to actually populate the attributes. This is all extremely fragile, vertex - * attributes have to be added in the order they will appear in the struct which maps memory. - * The processor key should reflect the vertex attributes, or there lack thereof in the - * GrGeometryProcessor. - */ - const Attribute& addVertexAttrib(const char* name, GrVertexAttribType type, - GrSLPrecision precision = kDefault_GrSLPrecision) { - precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; - fAttribs.emplace_back(name, type, precision); - fVertexStride += fAttribs.back().fOffset; - return fAttribs.back(); - } - void setWillUseGeoShader() { fWillUseGeoShader = true; } /** diff --git a/src/gpu/GrGpuCommandBuffer.cpp b/src/gpu/GrGpuCommandBuffer.cpp index 0c7bb9704f..5570a5a65e 100644 --- a/src/gpu/GrGpuCommandBuffer.cpp +++ b/src/gpu/GrGpuCommandBuffer.cpp @@ -42,8 +42,10 @@ bool GrGpuCommandBuffer::draw(const GrPipeline& pipeline, int meshCount, const SkRect& bounds) { #ifdef SK_DEBUG + SkASSERT(!primProc.hasInstanceAttribs() || this->gpu()->caps()->instanceAttribSupport()); for (int i = 0; i < meshCount; ++i) { - SkASSERT(SkToBool(primProc.numAttribs()) == meshes[i].hasVertexData()); + SkASSERT(primProc.hasVertexAttribs() == meshes[i].hasVertexData()); + SkASSERT(primProc.hasInstanceAttribs() == meshes[i].isInstanced()); } #endif diff --git a/src/gpu/GrMesh.h b/src/gpu/GrMesh.h index 98f9911a6f..53854764f2 100644 --- a/src/gpu/GrMesh.h +++ b/src/gpu/GrMesh.h @@ -24,19 +24,26 @@ public: GrMesh(GrPrimitiveType primitiveType) : fPrimitiveType(primitiveType) , fBaseVertex(0) { - SkDEBUGCODE(fNonIndexData.fVertexCount = -1); + SkDEBUGCODE(fNonIndexNonInstanceData.fVertexCount = -1;) } GrPrimitiveType primitiveType() const { return fPrimitiveType; } bool isIndexed() const { return SkToBool(fIndexBuffer.get()); } + bool isInstanced() const { return SkToBool(fInstanceBuffer.get()); } bool hasVertexData() const { return SkToBool(fVertexBuffer.get()); } - void setNonIndexed(int vertexCount); + void setNonIndexedNonInstanced(int vertexCount); + void setIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex, uint16_t minIndexValue, uint16_t maxIndexValue); void setIndexedPatterned(const GrBuffer* indexBuffer, int indexCount, int vertexCount, int patternRepeatCount, int maxPatternRepetitionsInIndexBuffer); + void setInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance, + int vertexCount); + void setIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, + const GrBuffer* instanceBuffer, int instanceCount, int baseInstance=0); + void setVertexData(const GrBuffer* vertexBuffer, int baseVertex = 0); class SendToGpuImpl { @@ -51,6 +58,17 @@ public: uint16_t maxIndexValue, const GrBuffer* vertexBuffer, int baseVertex) = 0; + virtual void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) = 0; + + virtual void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, + int baseIndex, const GrBuffer* vertexBuffer, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) = 0; + virtual ~SendToGpuImpl() {} }; @@ -63,15 +81,16 @@ private: GrPrimitiveType fPrimitiveType; PendingBuffer fIndexBuffer; + PendingBuffer fInstanceBuffer; PendingBuffer fVertexBuffer; int fBaseVertex; union { - struct { // When fIndexBuffer == nullptr. + struct { // When fIndexBuffer == nullptr and fInstanceBuffer == nullptr. int fVertexCount; - } fNonIndexData; + } fNonIndexNonInstanceData; - struct { // When fIndexBuffer != nullptr. + struct { // When fIndexBuffer != nullptr and fInstanceBuffer == nullptr. struct { int fIndexCount; int fPatternRepeatCount; @@ -90,12 +109,30 @@ private: } fPatternData; }; }; + + struct { // When fInstanceBuffer != nullptr. + struct { + int fInstanceCount; + int fBaseInstance; + } fInstanceData; + + union { // When fIndexBuffer == nullptr. + struct { + int fVertexCount; + } fInstanceNonIndexData; + + struct { // When fIndexBuffer != nullptr. + int fIndexCount; + } fInstanceIndexData; + }; + }; }; }; -inline void GrMesh::setNonIndexed(int vertexCount) { +inline void GrMesh::setNonIndexedNonInstanced(int vertexCount) { fIndexBuffer.reset(nullptr); - fNonIndexData.fVertexCount = vertexCount; + fInstanceBuffer.reset(nullptr); + fNonIndexNonInstanceData.fVertexCount = vertexCount; } inline void GrMesh::setIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex, @@ -105,6 +142,7 @@ inline void GrMesh::setIndexed(const GrBuffer* indexBuffer, int indexCount, int SkASSERT(baseIndex >= 0); SkASSERT(maxIndexValue > minIndexValue); fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(nullptr); fIndexData.fIndexCount = indexCount; fIndexData.fPatternRepeatCount = 0; fNonPatternIndexData.fBaseIndex = baseIndex; @@ -121,12 +159,40 @@ inline void GrMesh::setIndexedPatterned(const GrBuffer* indexBuffer, int indexCo SkASSERT(patternRepeatCount >= 1); SkASSERT(maxPatternRepetitionsInIndexBuffer >= 1); fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(nullptr); fIndexData.fIndexCount = indexCount; fIndexData.fPatternRepeatCount = patternRepeatCount; fPatternData.fVertexCount = vertexCount; fPatternData.fMaxPatternRepetitionsInIndexBuffer = maxPatternRepetitionsInIndexBuffer; } +inline void GrMesh::setInstanced(const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance, int vertexCount) { + SkASSERT(instanceBuffer); + SkASSERT(instanceCount >= 1); + SkASSERT(baseInstance >= 0); + fIndexBuffer.reset(nullptr); + fInstanceBuffer.reset(instanceBuffer); + fInstanceData.fInstanceCount = instanceCount; + fInstanceData.fBaseInstance = baseInstance; + fInstanceNonIndexData.fVertexCount = vertexCount; +} + +inline void GrMesh::setIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) { + SkASSERT(indexBuffer); + SkASSERT(indexCount >= 1); + SkASSERT(instanceBuffer); + SkASSERT(instanceCount >= 1); + SkASSERT(baseInstance >= 0); + fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(instanceBuffer); + fInstanceData.fInstanceCount = instanceCount; + fInstanceData.fBaseInstance = baseInstance; + fInstanceIndexData.fIndexCount = indexCount; +} + inline void GrMesh::setVertexData(const GrBuffer* vertexBuffer, int baseVertex) { SkASSERT(baseVertex >= 0); fVertexBuffer.reset(vertexBuffer); @@ -134,10 +200,26 @@ inline void GrMesh::setVertexData(const GrBuffer* vertexBuffer, int baseVertex) } inline void GrMesh::sendToGpu(const GrPrimitiveProcessor& primProc, SendToGpuImpl* impl) const { + if (this->isInstanced()) { + if (!this->isIndexed()) { + impl->sendInstancedMeshToGpu(primProc, fPrimitiveType, fVertexBuffer.get(), + fInstanceNonIndexData.fVertexCount, fBaseVertex, + fInstanceBuffer.get(), fInstanceData.fInstanceCount, + fInstanceData.fBaseInstance); + } else { + impl->sendIndexedInstancedMeshToGpu(primProc, fPrimitiveType, fIndexBuffer.get(), + fInstanceIndexData.fIndexCount, 0, + fVertexBuffer.get(), fBaseVertex, + fInstanceBuffer.get(), fInstanceData.fInstanceCount, + fInstanceData.fBaseInstance); + } + return; + } + if (!this->isIndexed()) { - SkASSERT(fNonIndexData.fVertexCount > 0); + SkASSERT(fNonIndexNonInstanceData.fVertexCount > 0); impl->sendMeshToGpu(primProc, fPrimitiveType, fVertexBuffer.get(), - fNonIndexData.fVertexCount, fBaseVertex); + fNonIndexNonInstanceData.fVertexCount, fBaseVertex); return; } diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h index d078ac5072..63265f02a6 100644 --- a/src/gpu/GrPrimitiveProcessor.h +++ b/src/gpu/GrPrimitiveProcessor.h @@ -40,33 +40,45 @@ class GrGLSLPrimitiveProcessor; */ class GrPrimitiveProcessor : public GrResourceIOProcessor, public GrProgramElement { public: - // Only the GrGeometryProcessor subclass actually has a geo shader or vertex attributes, but - // we put these calls on the base class to prevent having to cast - virtual bool willUseGeoShader() const = 0; - struct Attribute { - Attribute() - : fName(nullptr) - , fType(kFloat_GrVertexAttribType) - , fOffset(0) {} - Attribute(const char* name, GrVertexAttribType type, GrSLPrecision precision) - : fName(name) - , fType(type) - , fOffset(SkAlign4(GrVertexAttribTypeSize(type))) - , fPrecision(precision) {} - const char* fName; - GrVertexAttribType fType; - size_t fOffset; - GrSLPrecision fPrecision; + enum class InputRate : bool { + kPerVertex, + kPerInstance + }; + + const char* fName; + GrVertexAttribType fType; + int fOffsetInRecord; + GrSLPrecision fPrecision; + InputRate fInputRate; }; int numAttribs() const { return fAttribs.count(); } const Attribute& getAttrib(int index) const { return fAttribs[index]; } - // Returns the vertex stride of the GP. A common use case is to request geometry from a - // GrOpList based off of the stride, and to populate this memory using an implicit array of - // structs. In this case, it is best to assert the vertexstride == sizeof(VertexStruct). - size_t getVertexStride() const { return fVertexStride; } + bool hasVertexAttribs() const { return SkToBool(fVertexStride); } + bool hasInstanceAttribs() const { return SkToBool(fInstanceStride); } + + /** + * These return the strides of the vertex and instance buffers. Attributes are expected to be + * laid out interleaved in their corresponding buffer (vertex or instance). fOffsetInRecord + * indicates an attribute's location in bytes relative to the first attribute. (These are padded + * to the nearest 4 bytes for performance reasons.) + * + * A common practice is to populate the buffer's memory using an implicit array of structs. In + * this case, it is best to assert: + * + * stride == sizeof(struct) and + * offsetof(struct, field[i]) == attrib[i].fOffsetInRecord + * + * NOTE: for instanced draws the vertex buffer has a single record that each instance reuses. + */ + int getVertexStride() const { return fVertexStride; } + int getInstanceStride() const { return fInstanceStride; } + + // Only the GrGeometryProcessor subclass actually has a geo shader or vertex attributes, but + // we put these calls on the base class to prevent having to cast + virtual bool willUseGeoShader() const = 0; /** * Computes a transformKey from an array of coord transforms. Will only look at the first @@ -107,11 +119,25 @@ public: virtual bool implementsDistanceVector() const { return false; } protected: - GrPrimitiveProcessor() : fVertexStride(0) {} - - enum { kPreallocAttribCnt = 8 }; - SkSTArray fAttribs; - size_t fVertexStride; + /** + * Subclasses call these from their constructor to register vertex and instance attributes. + */ + const Attribute& addVertexAttrib(const char* name, GrVertexAttribType type, + GrSLPrecision precision = kDefault_GrSLPrecision) { + precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; + fAttribs.push_back() = {name, type, fVertexStride, precision, + Attribute::InputRate::kPerVertex}; + fVertexStride += static_cast(SkAlign4(GrVertexAttribTypeSize(type))); + return fAttribs.back(); + } + const Attribute& addInstanceAttrib(const char* name, GrVertexAttribType type, + GrSLPrecision precision = kDefault_GrSLPrecision) { + precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; + fAttribs.push_back() = {name, type, fInstanceStride, precision, + Attribute::InputRate::kPerInstance}; + fInstanceStride += static_cast(SkAlign4(GrVertexAttribTypeSize(type))); + return fAttribs.back(); + } private: void addPendingIOs() const override { GrResourceIOProcessor::addPendingIOs(); } @@ -120,6 +146,10 @@ private: void notifyRefCntIsZero() const final {} virtual bool hasExplicitLocalCoords() const = 0; + SkSTArray<8, Attribute> fAttribs; + int fVertexStride = 0; + int fInstanceStride = 0; + typedef GrProcessor INHERITED; }; diff --git a/src/gpu/GrShaderCaps.cpp b/src/gpu/GrShaderCaps.cpp index cf29ab5976..6f5dc7f0d7 100644 --- a/src/gpu/GrShaderCaps.cpp +++ b/src/gpu/GrShaderCaps.cpp @@ -68,6 +68,7 @@ GrShaderCaps::GrShaderCaps(const GrContextOptions& options) { fSampleMaskOverrideCoverageSupport = false; fExternalTextureSupport = false; fTexelFetchSupport = false; + fVertexIDSupport = false; fVersionDeclString = nullptr; fShaderDerivativeExtensionString = nullptr; @@ -158,6 +159,7 @@ SkString GrShaderCaps::dump() const { "YES" : "NO")); r.appendf("External texture support: %s\n", (fExternalTextureSupport ? "YES" : "NO")); r.appendf("texelFetch support: %s\n", (fTexelFetchSupport ? "YES" : "NO")); + r.appendf("sk_VertexID support: %s\n", (fVertexIDSupport ? "YES" : "NO")); r.appendf("Max VS Samplers: %d\n", fMaxVertexSamplers); r.appendf("Max GS Samplers: %d\n", fMaxGeometrySamplers); r.appendf("Max FS Samplers: %d\n", fMaxFragmentSamplers); diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp index df905a4f0f..a39b744574 100644 --- a/src/gpu/gl/GrGLCaps.cpp +++ b/src/gpu/gl/GrGLCaps.cpp @@ -38,7 +38,6 @@ GrGLCaps::GrGLCaps(const GrContextOptions& contextOptions, fDirectStateAccessSupport = false; fDebugSupport = false; fES2CompatibilitySupport = false; - fDrawInstancedSupport = false; fDrawIndirectSupport = false; fMultiDrawIndirectSupport = false; fBaseInstanceSupport = false; @@ -184,6 +183,20 @@ void GrGLCaps::init(const GrContextOptions& contextOptions, fMultisampleDisableSupport = ctxInfo.hasExtension("GL_EXT_multisample_compatibility"); } + if (kGL_GrGLStandard == standard) { + // 3.1 has draw_instanced but not instanced_arrays, for the time being we only care about + // instanced arrays, but we could make this more granular if we wanted + fInstanceAttribSupport = + version >= GR_GL_VER(3, 2) || + (ctxInfo.hasExtension("GL_ARB_draw_instanced") && + ctxInfo.hasExtension("GL_ARB_instanced_arrays")); + } else { + fInstanceAttribSupport = + version >= GR_GL_VER(3, 0) || + (ctxInfo.hasExtension("GL_EXT_draw_instanced") && + ctxInfo.hasExtension("GL_EXT_instanced_arrays")); + } + if (kGL_GrGLStandard == standard) { if (version >= GR_GL_VER(3, 0)) { fBindFragDataLocationSupport = true; @@ -528,20 +541,6 @@ void GrGLCaps::init(const GrContextOptions& contextOptions, fOversizedStencilSupport = ctxInfo.version() >= GR_GL_VER(3, 0); } - if (kGL_GrGLStandard == standard) { - // 3.1 has draw_instanced but not instanced_arrays, for the time being we only care about - // instanced arrays, but we could make this more granular if we wanted - fDrawInstancedSupport = - version >= GR_GL_VER(3, 2) || - (ctxInfo.hasExtension("GL_ARB_draw_instanced") && - ctxInfo.hasExtension("GL_ARB_instanced_arrays")); - } else { - fDrawInstancedSupport = - version >= GR_GL_VER(3, 0) || - (ctxInfo.hasExtension("GL_EXT_draw_instanced") && - ctxInfo.hasExtension("GL_EXT_instanced_arrays")); - } - if (kGL_GrGLStandard == standard) { fDrawIndirectSupport = version >= GR_GL_VER(4,0) || ctxInfo.hasExtension("GL_ARB_draw_indirect"); @@ -850,6 +849,13 @@ void GrGLCaps::initGLSL(const GrGLContextInfo& ctxInfo) { } } + if (kGL_GrGLStandard == standard) { + shaderCaps->fVertexIDSupport = true; + } else { + // Desktop GLSL 3.30 == ES GLSL 3.00. + shaderCaps->fVertexIDSupport = ctxInfo.glslGeneration() >= k330_GrGLSLGeneration; + } + // The Tegra3 compiler will sometimes never return if we have min(abs(x), 1.0), so we must do // the abs first in a separate expression. if (kTegra3_GrGLRenderer == ctxInfo.renderer()) { @@ -1242,7 +1248,6 @@ SkString GrGLCaps::dump() const { r.appendf("Vertex array object support: %s\n", (fVertexArrayObjectSupport ? "YES": "NO")); r.appendf("Direct state access support: %s\n", (fDirectStateAccessSupport ? "YES": "NO")); r.appendf("Debug support: %s\n", (fDebugSupport ? "YES": "NO")); - r.appendf("Draw instanced support: %s\n", (fDrawInstancedSupport ? "YES" : "NO")); r.appendf("Draw indirect support: %s\n", (fDrawIndirectSupport ? "YES" : "NO")); r.appendf("Multi draw indirect support: %s\n", (fMultiDrawIndirectSupport ? "YES" : "NO")); r.appendf("Base instance support: %s\n", (fBaseInstanceSupport ? "YES" : "NO")); diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp index 045804c668..c32d5d8fda 100644 --- a/src/gpu/gl/GrGLGpu.cpp +++ b/src/gpu/gl/GrGLGpu.cpp @@ -1779,33 +1779,46 @@ bool GrGLGpu::flushGLState(const GrPipeline& pipeline, const GrPrimitiveProcesso void GrGLGpu::setupGeometry(const GrPrimitiveProcessor& primProc, const GrBuffer* indexBuffer, const GrBuffer* vertexBuffer, - int baseVertex) { + int baseVertex, + const GrBuffer* instanceBuffer, + int baseInstance) { GrGLAttribArrayState* attribState; if (indexBuffer) { - SkASSERT(indexBuffer); - SkASSERT(!indexBuffer->isMapped()); + SkASSERT(indexBuffer && !indexBuffer->isMapped()); attribState = fHWVertexArrayState.bindInternalVertexArray(this, indexBuffer); } else { attribState = fHWVertexArrayState.bindInternalVertexArray(this); } - int vaCount = primProc.numAttribs(); - attribState->enableVertexArrays(this, vaCount); + struct { + const GrBuffer* fBuffer; + int fStride; + size_t fBufferOffset; + } bindings[2]; - if (vaCount > 0) { - SkASSERT(vertexBuffer); - SkASSERT(!vertexBuffer->isMapped()); + if (int vertexStride = primProc.getVertexStride()) { + SkASSERT(vertexBuffer && !vertexBuffer->isMapped()); + bindings[0].fBuffer = vertexBuffer; + bindings[0].fStride = vertexStride; + bindings[0].fBufferOffset = vertexBuffer->baseOffset() + baseVertex * vertexStride; + } + if (int instanceStride = primProc.getInstanceStride()) { + SkASSERT(instanceBuffer && !instanceBuffer->isMapped()); + bindings[1].fBuffer = instanceBuffer; + bindings[1].fStride = instanceStride; + bindings[1].fBufferOffset = instanceBuffer->baseOffset() + baseInstance * instanceStride; + } - GrGLsizei stride = static_cast(primProc.getVertexStride()); - size_t vertexBufferOffsetInBytes = stride * baseVertex + vertexBuffer->baseOffset(); - size_t attribOffset = 0; + int numAttribs = primProc.numAttribs(); + attribState->enableVertexArrays(this, numAttribs); - for (int attribIndex = 0; attribIndex < vaCount; attribIndex++) { - const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(attribIndex); - attribState->set(this, attribIndex, vertexBuffer, attrib.fType, stride, - vertexBufferOffsetInBytes + attribOffset); - attribOffset += attrib.fOffset; - } + for (int i = 0; i < numAttribs; ++i) { + using InputRate = GrPrimitiveProcessor::Attribute::InputRate; + const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(i); + const int divisor = InputRate::kPerInstance == attrib.fInputRate ? 1 : 0; + const auto& binding = bindings[divisor]; + attribState->set(this, i, binding.fBuffer, attrib.fType, binding.fStride, + binding.fBufferOffset + attrib.fOffsetInRecord, divisor); } } @@ -2472,10 +2485,10 @@ void GrGLGpu::sendMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveTyp const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; if (this->glCaps().drawArraysBaseVertexIsBroken()) { - this->setupGeometry(primProc, nullptr, vertexBuffer, baseVertex); + this->setupGeometry(primProc, nullptr, vertexBuffer, baseVertex, nullptr, 0); GL_CALL(DrawArrays(glPrimType, 0, vertexCount)); } else { - this->setupGeometry(primProc, nullptr, vertexBuffer, 0); + this->setupGeometry(primProc, nullptr, vertexBuffer, 0, nullptr, 0); GL_CALL(DrawArrays(glPrimType, baseVertex, vertexCount)); } fStats.incNumDraws(); @@ -2490,7 +2503,7 @@ void GrGLGpu::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, GrGLvoid* const indices = reinterpret_cast(indexBuffer->baseOffset() + sizeof(uint16_t) * baseIndex); - this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex); + this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex, nullptr, 0); if (this->glCaps().drawRangeElementsSupport()) { GL_CALL(DrawRangeElements(glPrimType, minIndexValue, maxIndexValue, indexCount, @@ -2501,6 +2514,33 @@ void GrGLGpu::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, fStats.incNumDraws(); } +void GrGLGpu::sendInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType + primitiveType, const GrBuffer* vertexBuffer, + int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) { + const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; + this->setupGeometry(primProc, nullptr, vertexBuffer, 0, instanceBuffer, baseInstance); + GL_CALL(DrawArraysInstanced(glPrimType, baseVertex, vertexCount, instanceCount)); + fStats.incNumDraws(); +} + +void GrGLGpu::sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType primitiveType, + const GrBuffer* indexBuffer, int indexCount, + int baseIndex, const GrBuffer* vertexBuffer, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) { + const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; + GrGLvoid* indices = reinterpret_cast(indexBuffer->baseOffset() + + sizeof(uint16_t) * baseIndex); + this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex, + instanceBuffer, baseInstance); + GL_CALL(DrawElementsInstanced(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT, indices, + instanceCount)); + fStats.incNumDraws(); +} + void GrGLGpu::onResolveRenderTarget(GrRenderTarget* target) { GrGLRenderTarget* rt = static_cast(target); if (rt->needsResolve()) { diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h index c83ad85585..7b59c3b696 100644 --- a/src/gpu/gl/GrGLGpu.h +++ b/src/gpu/gl/GrGLGpu.h @@ -115,6 +115,17 @@ public: uint16_t minIndexValue, uint16_t maxIndexValue, const GrBuffer* vertexBuffer, int baseVertex) final; + void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + + void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, int baseIndex, + const GrBuffer* vertexBuffer, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + // The GrGLGpuCommandBuffer does not buffer up draws before submitting them to the gpu. // Thus this is the implementation of the clear call for the corresponding passthrough function // on GrGLGpuCommandBuffer. @@ -264,7 +275,9 @@ private: void setupGeometry(const GrPrimitiveProcessor&, const GrBuffer* indexBuffer, const GrBuffer* vertexBuffer, - int baseVertex); + int baseVertex, + const GrBuffer* instanceBuffer, + int baseInstance); void flushBlend(const GrXferProcessor::BlendInfo& blendInfo, const GrSwizzle&); diff --git a/src/gpu/gl/GrGLVertexArray.cpp b/src/gpu/gl/GrGLVertexArray.cpp index 74e609e9b8..59f2be9831 100644 --- a/src/gpu/gl/GrGLVertexArray.cpp +++ b/src/gpu/gl/GrGLVertexArray.cpp @@ -53,8 +53,10 @@ void GrGLAttribArrayState::set(GrGLGpu* gpu, const GrBuffer* vertexBuffer, GrVertexAttribType type, GrGLsizei stride, - size_t offsetInBytes) { + size_t offsetInBytes, + int divisor) { SkASSERT(index >= 0 && index < fAttribArrayStates.count()); + SkASSERT(0 == divisor || gpu->caps()->instanceAttribSupport()); AttribArrayState* array = &fAttribArrayStates[index]; if (array->fVertexBufferUniqueID != vertexBuffer->uniqueID() || array->fType != type || @@ -84,10 +86,18 @@ void GrGLAttribArrayState::set(GrGLGpu* gpu, array->fStride = stride; array->fOffset = offsetInBytes; } + if (gpu->caps()->instanceAttribSupport() && array->fDivisor != divisor) { + SkASSERT(0 == divisor || 1 == divisor); // not necessarily a requirement but what we expect. + GR_GL_CALL(gpu->glInterface(), VertexAttribDivisor(index, divisor)); + array->fDivisor = divisor; + } } void GrGLAttribArrayState::enableVertexArrays(const GrGLGpu* gpu, int enabledCount) { SkASSERT(enabledCount <= fAttribArrayStates.count()); + if (fEnabledCountIsValid && enabledCount == fNumEnabledArrays) { + return; + } int firstIdxToEnable = fEnabledCountIsValid ? fNumEnabledArrays : 0; for (int i = firstIdxToEnable; i < enabledCount; ++i) { diff --git a/src/gpu/gl/GrGLVertexArray.h b/src/gpu/gl/GrGLVertexArray.h index 1970e30425..553df4f3fd 100644 --- a/src/gpu/gl/GrGLVertexArray.h +++ b/src/gpu/gl/GrGLVertexArray.h @@ -42,7 +42,8 @@ public: const GrBuffer* vertexBuffer, GrVertexAttribType type, GrGLsizei stride, - size_t offsetInBytes); + size_t offsetInBytes, + int divisor = 0); /** * This function enables the first 'enabledCount' vertex arrays and disables the rest. @@ -63,16 +64,22 @@ public: int count() const { return fAttribArrayStates.count(); } private: + static constexpr int kInvalidDivisor = -1; + /** * Tracks the state of glVertexAttribArray for an attribute index. */ struct AttribArrayState { - void invalidate() { fVertexBufferUniqueID.makeInvalid(); } + void invalidate() { + fVertexBufferUniqueID.makeInvalid(); + fDivisor = kInvalidDivisor; + } GrGpuResource::UniqueID fVertexBufferUniqueID; GrVertexAttribType fType; GrGLsizei fStride; size_t fOffset; + int fDivisor; }; SkSTArray<16, AttribArrayState, true> fAttribArrayStates; diff --git a/src/gpu/ops/GrDefaultPathRenderer.cpp b/src/gpu/ops/GrDefaultPathRenderer.cpp index 7633868c25..c282036971 100644 --- a/src/gpu/ops/GrDefaultPathRenderer.cpp +++ b/src/gpu/ops/GrDefaultPathRenderer.cpp @@ -249,7 +249,7 @@ private: GrMesh mesh(primitiveType); if (!isIndexed) { - mesh.setNonIndexed(vertexOffset); + mesh.setNonIndexedNonInstanced(vertexOffset); } else { mesh.setIndexed(indexBuffer, indexOffset, firstIndex, 0, vertexOffset - 1); } diff --git a/src/gpu/ops/GrDrawVerticesOp.cpp b/src/gpu/ops/GrDrawVerticesOp.cpp index e5b13831d2..a83b3e07b9 100644 --- a/src/gpu/ops/GrDrawVerticesOp.cpp +++ b/src/gpu/ops/GrDrawVerticesOp.cpp @@ -231,7 +231,7 @@ void GrDrawVerticesOp::onPrepareDraws(Target* target) const { GrMesh mesh(this->primitiveType()); if (!indices) { - mesh.setNonIndexed(fVertexCount); + mesh.setNonIndexedNonInstanced(fVertexCount); } else { mesh.setIndexed(indexBuffer, fIndexCount, firstIndex, 0, fVertexCount - 1); } diff --git a/src/gpu/ops/GrMSAAPathRenderer.cpp b/src/gpu/ops/GrMSAAPathRenderer.cpp index 36faf2f2c9..50a839d7b0 100644 --- a/src/gpu/ops/GrMSAAPathRenderer.cpp +++ b/src/gpu/ops/GrMSAAPathRenderer.cpp @@ -337,7 +337,7 @@ private: const GrBuffer* lineVertexBuffer; int firstLineVertex; MSAALineVertices lines; - size_t lineVertexStride = sizeof(MSAALineVertices::Vertex); + int lineVertexStride = sizeof(MSAALineVertices::Vertex); lines.vertices = (MSAALineVertices::Vertex*) target->makeVertexSpace(lineVertexStride, fMaxLineVertices, &lineVertexBuffer, @@ -350,7 +350,7 @@ private: SkDEBUGCODE(lines.verticesEnd = lines.vertices + fMaxLineVertices;) MSAAQuadVertices quads; - size_t quadVertexStride = sizeof(MSAAQuadVertices::Vertex); + int quadVertexStride = sizeof(MSAAQuadVertices::Vertex); SkAutoMalloc quadVertexPtr(fMaxQuadVertices * quadVertexStride); quads.vertices = (MSAAQuadVertices::Vertex*) quadVertexPtr.get(); quads.nextVertex = quads.vertices; @@ -412,7 +412,7 @@ private: GrMesh lineMeshes(primitiveType); if (!fIsIndexed) { - lineMeshes.setNonIndexed(lineVertexOffset); + lineMeshes.setNonIndexedNonInstanced(lineVertexOffset); } else { lineMeshes.setIndexed(lineIndexBuffer, lineIndexOffset, firstLineIndex, 0, lineVertexOffset - 1); @@ -439,7 +439,7 @@ private: memcpy(quadVertices, quads.vertices, quadVertexStride * quadVertexOffset); GrMesh quadMeshes(kTriangles_GrPrimitiveType); if (!fIsIndexed) { - quadMeshes.setNonIndexed(quadVertexOffset); + quadMeshes.setNonIndexedNonInstanced(quadVertexOffset); } else { const GrBuffer* quadIndexBuffer; int firstQuadIndex; diff --git a/src/gpu/ops/GrNonAAFillRectOp.cpp b/src/gpu/ops/GrNonAAFillRectOp.cpp index f50fcc0cc5..32e77da52b 100644 --- a/src/gpu/ops/GrNonAAFillRectOp.cpp +++ b/src/gpu/ops/GrNonAAFillRectOp.cpp @@ -119,6 +119,7 @@ public: const SkRect& rect, const SkRect* localRect, const SkMatrix* localMatrix, GrAAType aaType, const GrUserStencilSettings* stencilSettings) : INHERITED(ClassID()), fHelper(args, aaType, stencilSettings) { + SkASSERT(!viewMatrix.hasPerspective() && (!localMatrix || !localMatrix->hasPerspective())); RectInfo& info = fRects.push_back(); info.fColor = color; diff --git a/src/gpu/ops/GrNonAAStrokeRectOp.cpp b/src/gpu/ops/GrNonAAStrokeRectOp.cpp index f386984a30..eee8120eff 100644 --- a/src/gpu/ops/GrNonAAStrokeRectOp.cpp +++ b/src/gpu/ops/GrNonAAStrokeRectOp.cpp @@ -157,7 +157,7 @@ private: } GrMesh mesh(primType); - mesh.setNonIndexed(vertexCount); + mesh.setNonIndexedNonInstanced(vertexCount); mesh.setVertexData(vertexBuffer, firstVertex); target->draw(gp.get(), this->pipeline(), mesh); } diff --git a/src/gpu/ops/GrTessellatingPathRenderer.cpp b/src/gpu/ops/GrTessellatingPathRenderer.cpp index 9860d9c16d..27de8f2e5f 100644 --- a/src/gpu/ops/GrTessellatingPathRenderer.cpp +++ b/src/gpu/ops/GrTessellatingPathRenderer.cpp @@ -314,7 +314,7 @@ private: void drawVertices(Target* target, const GrGeometryProcessor* gp, const GrBuffer* vb, int firstVertex, int count) const { GrMesh mesh(TESSELLATOR_WIREFRAME ? kLines_GrPrimitiveType : kTriangles_GrPrimitiveType); - mesh.setNonIndexed(count); + mesh.setNonIndexedNonInstanced(count); mesh.setVertexData(vb, firstVertex); target->draw(gp, this->pipeline(), mesh); } diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp index 16b46096e4..e8a397847c 100644 --- a/src/gpu/vk/GrVkCaps.cpp +++ b/src/gpu/vk/GrVkCaps.cpp @@ -32,6 +32,7 @@ GrVkCaps::GrVkCaps(const GrContextOptions& contextOptions, const GrVkInterface* fReuseScratchTextures = true; //TODO: figure this out fGpuTracingSupport = false; //TODO: figure this out fOversizedStencilSupport = false; //TODO: figure this out + fInstanceAttribSupport = true; fUseDrawInsteadOfClear = false; fFenceSyncSupport = true; // always available in Vulkan @@ -247,6 +248,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties, uint } shaderCaps->fIntegerSupport = true; + shaderCaps->fVertexIDSupport = true; // Assume the minimum precisions mandated by the SPIR-V spec. shaderCaps->fShaderPrecisionVaries = true; diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp index cc219f5ec8..ea0a02cb34 100644 --- a/src/gpu/vk/GrVkCommandBuffer.cpp +++ b/src/gpu/vk/GrVkCommandBuffer.cpp @@ -18,10 +18,10 @@ #include "SkRect.h" void GrVkCommandBuffer::invalidateState() { - fBoundVertexBuffer = VK_NULL_HANDLE; - fBoundVertexBufferIsValid = false; + for (auto& boundInputBuffer : fBoundInputBuffers) { + boundInputBuffer = VK_NULL_HANDLE; + } fBoundIndexBuffer = VK_NULL_HANDLE; - fBoundIndexBufferIsValid = false; memset(&fCachedViewport, 0, sizeof(VkViewport)); fCachedViewport.width = - 1.0f; // Viewport must have a width greater than 0 diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h index 7c54877ec2..1f3c4a50e5 100644 --- a/src/gpu/vk/GrVkCommandBuffer.h +++ b/src/gpu/vk/GrVkCommandBuffer.h @@ -40,33 +40,36 @@ public: BarrierType barrierType, void* barrier) const; - void bindVertexBuffer(GrVkGpu* gpu, const GrVkVertexBuffer* vbuffer) { + static constexpr uint32_t kMaxInputBuffers = 2; + + void bindInputBuffer(GrVkGpu* gpu, uint32_t binding, const GrVkVertexBuffer* vbuffer) { VkBuffer vkBuffer = vbuffer->buffer(); + SkASSERT(VK_NULL_HANDLE != vkBuffer); + SkASSERT(binding < kMaxInputBuffers); // TODO: once vbuffer->offset() no longer always returns 0, we will need to track the offset // to know if we can skip binding or not. - if (!fBoundVertexBufferIsValid || vkBuffer != fBoundVertexBuffer) { + if (vkBuffer != fBoundInputBuffers[binding]) { VkDeviceSize offset = vbuffer->offset(); GR_VK_CALL(gpu->vkInterface(), CmdBindVertexBuffers(fCmdBuffer, - 0, + binding, 1, &vkBuffer, &offset)); - fBoundVertexBufferIsValid = true; - fBoundVertexBuffer = vkBuffer; + fBoundInputBuffers[binding] = vkBuffer; addResource(vbuffer->resource()); } } void bindIndexBuffer(GrVkGpu* gpu, const GrVkIndexBuffer* ibuffer) { VkBuffer vkBuffer = ibuffer->buffer(); + SkASSERT(VK_NULL_HANDLE != vkBuffer); // TODO: once ibuffer->offset() no longer always returns 0, we will need to track the offset // to know if we can skip binding or not. - if (!fBoundIndexBufferIsValid || vkBuffer != fBoundIndexBuffer) { + if (vkBuffer != fBoundIndexBuffer) { GR_VK_CALL(gpu->vkInterface(), CmdBindIndexBuffer(fCmdBuffer, vkBuffer, ibuffer->offset(), VK_INDEX_TYPE_UINT16)); - fBoundIndexBufferIsValid = true; fBoundIndexBuffer = vkBuffer; addResource(ibuffer->resource()); } @@ -146,8 +149,6 @@ protected: : fIsActive(false) , fActiveRenderPass(rp) , fCmdBuffer(cmdBuffer) - , fBoundVertexBufferIsValid(false) - , fBoundIndexBufferIsValid(false) , fNumResets(0) { fTrackedResources.setReserve(kInitialTrackedResourcesCount); fTrackedRecycledResources.setReserve(kInitialTrackedResourcesCount); @@ -177,11 +178,8 @@ private: virtual void onReset(GrVkGpu* gpu) {} - VkBuffer fBoundVertexBuffer; - bool fBoundVertexBufferIsValid; - - VkBuffer fBoundIndexBuffer; - bool fBoundIndexBufferIsValid; + VkBuffer fBoundInputBuffers[kMaxInputBuffers]; + VkBuffer fBoundIndexBuffer; // When resetting the command buffer, we remove the tracked resources from their arrays, and // we prefer to not free all the memory every time so usually we just rewind. However, to avoid diff --git a/src/gpu/vk/GrVkCopyManager.cpp b/src/gpu/vk/GrVkCopyManager.cpp index 5301deae49..cd5dba4b18 100644 --- a/src/gpu/vk/GrVkCopyManager.cpp +++ b/src/gpu/vk/GrVkCopyManager.cpp @@ -377,7 +377,7 @@ bool GrVkCopyManager::copySurfaceAsDraw(GrVkGpu* gpu, scissor.offset.y = 0; cmdBuffer->setScissor(gpu, 0, 1, &scissor); - cmdBuffer->bindVertexBuffer(gpu, fVertexBuffer.get()); + cmdBuffer->bindInputBuffer(gpu, 0, fVertexBuffer.get()); cmdBuffer->draw(gpu, 4, 1, 0, 0); cmdBuffer->endRenderPass(gpu); diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp index 2e9373b130..6f39cfa545 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp +++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp @@ -426,17 +426,35 @@ void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::Deferre void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc, const GrBuffer* indexBuffer, - const GrBuffer* vertexBuffer) { + const GrBuffer* vertexBuffer, + const GrBuffer* instanceBuffer) { GrVkSecondaryCommandBuffer* currCmdBuf = fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf(); // There is no need to put any memory barriers to make sure host writes have finished here. // When a command buffer is submitted to a queue, there is an implicit memory barrier that // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of // an active RenderPass. - SkASSERT(vertexBuffer); - SkASSERT(!vertexBuffer->isCPUBacked()); - SkASSERT(!vertexBuffer->isMapped()); - currCmdBuf->bindVertexBuffer(fGpu, static_cast(vertexBuffer)); + // Here our vertex and instance inputs need to match the same 0-based bindings they were + // assigned in GrVkPipeline. That is, vertex first (if any) followed by instance. + uint32_t binding = 0; + + if (primProc.hasVertexAttribs()) { + SkASSERT(vertexBuffer); + SkASSERT(!vertexBuffer->isCPUBacked()); + SkASSERT(!vertexBuffer->isMapped()); + + currCmdBuf->bindInputBuffer(fGpu, binding++, + static_cast(vertexBuffer)); + } + + if (primProc.hasInstanceAttribs()) { + SkASSERT(instanceBuffer); + SkASSERT(!instanceBuffer->isCPUBacked()); + SkASSERT(!instanceBuffer->isMapped()); + + currCmdBuf->bindInputBuffer(fGpu, binding++, + static_cast(instanceBuffer)); + } if (indexBuffer) { SkASSERT(indexBuffer); @@ -575,29 +593,34 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline, pipelineState->freeTempResources(fGpu); } -void GrVkGpuCommandBuffer::sendMeshToGpu(const GrPrimitiveProcessor& primProc, - GrPrimitiveType, - const GrBuffer* vertexBuffer, - int vertexCount, - int baseVertex) { +void GrVkGpuCommandBuffer::sendInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType, + const GrBuffer* vertexBuffer, + int vertexCount, + int baseVertex, + const GrBuffer* instanceBuffer, + int instanceCount, + int baseInstance) { CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; - this->bindGeometry(primProc, nullptr, vertexBuffer); - cbInfo.currentCmdBuf()->draw(fGpu, vertexCount, 1, baseVertex, 0); + this->bindGeometry(primProc, nullptr, vertexBuffer, instanceBuffer); + cbInfo.currentCmdBuf()->draw(fGpu, vertexCount, instanceCount, baseVertex, baseInstance); fGpu->stats()->incNumDraws(); } -void GrVkGpuCommandBuffer::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, - GrPrimitiveType, - const GrBuffer* indexBuffer, - int indexCount, - int baseIndex, - uint16_t /*minIndexValue*/, - uint16_t /*maxIndexValue*/, - const GrBuffer* vertexBuffer, - int baseVertex) { +void GrVkGpuCommandBuffer::sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType, + const GrBuffer* indexBuffer, + int indexCount, + int baseIndex, + const GrBuffer* vertexBuffer, + int baseVertex, + const GrBuffer* instanceBuffer, + int instanceCount, + int baseInstance) { CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; - this->bindGeometry(primProc, indexBuffer, vertexBuffer); - cbInfo.currentCmdBuf()->drawIndexed(fGpu, indexCount, 1, baseIndex, baseVertex, 0); + this->bindGeometry(primProc, indexBuffer, vertexBuffer, instanceBuffer); + cbInfo.currentCmdBuf()->drawIndexed(fGpu, indexCount, instanceCount, + baseIndex, baseVertex, baseInstance); fGpu->stats()->incNumDraws(); } diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h index b1e96a3e11..6836fac0ca 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.h +++ b/src/gpu/vk/GrVkGpuCommandBuffer.h @@ -48,7 +48,8 @@ private: // Bind vertex and index buffers void bindGeometry(const GrPrimitiveProcessor&, const GrBuffer* indexBuffer, - const GrBuffer* vertexBuffer); + const GrBuffer* vertexBuffer, + const GrBuffer* instanceBuffer); sk_sp prepareDrawState(const GrPipeline&, const GrPrimitiveProcessor&, @@ -62,13 +63,30 @@ private: // GrMesh::SendToGpuImpl methods. These issue the actual Vulkan draw commands. // Marked final as a hint to the compiler to not use virtual dispatch. - void sendMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, - const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) final; + void sendMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType primType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) final { + this->sendInstancedMeshToGpu(primProc, primType, vertexBuffer, vertexCount, baseVertex, + nullptr, 1, 0); + } - void sendIndexedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + void sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType primType, const GrBuffer* indexBuffer, int indexCount, int baseIndex, - uint16_t minIndexValue, uint16_t maxIndexValue, - const GrBuffer* vertexBuffer, int baseVertex) final; + uint16_t /*minIndexValue*/, uint16_t /*maxIndexValue*/, + const GrBuffer* vertexBuffer, int baseVertex) final { + this->sendIndexedInstancedMeshToGpu(primProc, primType, indexBuffer, indexCount, baseIndex, + vertexBuffer, baseVertex, nullptr, 1, 0); + } + + void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + + void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, int baseIndex, + const GrBuffer* vertexBuffer, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; void onClear(GrRenderTarget*, const GrFixedClip&, GrColor color) override; diff --git a/src/gpu/vk/GrVkPipeline.cpp b/src/gpu/vk/GrVkPipeline.cpp index 47acb94266..2732c6fc80 100644 --- a/src/gpu/vk/GrVkPipeline.cpp +++ b/src/gpu/vk/GrVkPipeline.cpp @@ -46,30 +46,41 @@ static inline VkFormat attrib_type_to_vkformat(GrVertexAttribType type) { } static void setup_vertex_input_state(const GrPrimitiveProcessor& primProc, - VkPipelineVertexInputStateCreateInfo* vertexInputInfo, - VkVertexInputBindingDescription* bindingDesc, - int maxBindingDescCount, - VkVertexInputAttributeDescription* attributeDesc) { - // for now we have only one vertex buffer and one binding - memset(bindingDesc, 0, sizeof(VkVertexInputBindingDescription)); - bindingDesc->binding = 0; - bindingDesc->stride = (uint32_t)primProc.getVertexStride(); - bindingDesc->inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + VkPipelineVertexInputStateCreateInfo* vertexInputInfo, + SkSTArray<2, VkVertexInputBindingDescription, true>* bindingDescs, + VkVertexInputAttributeDescription* attributeDesc) { + uint32_t vertexBinding, instanceBinding; + + if (primProc.hasVertexAttribs()) { + vertexBinding = bindingDescs->count(); + bindingDescs->push_back() = { + vertexBinding, + (uint32_t) primProc.getVertexStride(), + VK_VERTEX_INPUT_RATE_VERTEX + }; + } + + if (primProc.hasInstanceAttribs()) { + instanceBinding = bindingDescs->count(); + bindingDescs->push_back() = { + instanceBinding, + (uint32_t) primProc.getInstanceStride(), + VK_VERTEX_INPUT_RATE_INSTANCE + }; + } // setup attribute descriptions int vaCount = primProc.numAttribs(); if (vaCount > 0) { - size_t offset = 0; for (int attribIndex = 0; attribIndex < vaCount; attribIndex++) { + using InputRate = GrPrimitiveProcessor::Attribute::InputRate; const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(attribIndex); - GrVertexAttribType attribType = attrib.fType; - VkVertexInputAttributeDescription& vkAttrib = attributeDesc[attribIndex]; vkAttrib.location = attribIndex; // for now assume location = attribIndex - vkAttrib.binding = 0; // for now only one vertex buffer & binding - vkAttrib.format = attrib_type_to_vkformat(attribType); - vkAttrib.offset = static_cast(offset); - offset += attrib.fOffset; + vkAttrib.binding = InputRate::kPerInstance == attrib.fInputRate ? instanceBinding + : vertexBinding; + vkAttrib.format = attrib_type_to_vkformat(attrib.fType); + vkAttrib.offset = attrib.fOffsetInRecord; } } @@ -77,8 +88,8 @@ static void setup_vertex_input_state(const GrPrimitiveProcessor& primProc, vertexInputInfo->sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vertexInputInfo->pNext = nullptr; vertexInputInfo->flags = 0; - vertexInputInfo->vertexBindingDescriptionCount = 1; - vertexInputInfo->pVertexBindingDescriptions = bindingDesc; + vertexInputInfo->vertexBindingDescriptionCount = bindingDescs->count(); + vertexInputInfo->pVertexBindingDescriptions = bindingDescs->begin(); vertexInputInfo->vertexAttributeDescriptionCount = vaCount; vertexInputInfo->pVertexAttributeDescriptions = attributeDesc; } @@ -402,11 +413,11 @@ GrVkPipeline* GrVkPipeline::Create(GrVkGpu* gpu, const GrPipeline& pipeline, VkPipelineLayout layout, VkPipelineCache cache) { VkPipelineVertexInputStateCreateInfo vertexInputInfo; - VkVertexInputBindingDescription bindingDesc; + SkSTArray<2, VkVertexInputBindingDescription, true> bindingDescs; SkSTArray<16, VkVertexInputAttributeDescription> attributeDesc; SkASSERT(primProc.numAttribs() <= gpu->vkCaps().maxVertexAttributes()); VkVertexInputAttributeDescription* pAttribs = attributeDesc.push_back_n(primProc.numAttribs()); - setup_vertex_input_state(primProc, &vertexInputInfo, &bindingDesc, 1, pAttribs); + setup_vertex_input_state(primProc, &vertexInputInfo, &bindingDescs, pAttribs); VkPipelineInputAssemblyStateCreateInfo inputAssemblyInfo; setup_input_assembly_state(primitiveType, &inputAssemblyInfo); diff --git a/tests/GrMeshTest.cpp b/tests/GrMeshTest.cpp index e8e3aeb36f..3a2be784a5 100644 --- a/tests/GrMeshTest.cpp +++ b/tests/GrMeshTest.cpp @@ -23,6 +23,7 @@ #include "glsl/GrGLSLGeometryProcessor.h" #include "glsl/GrGLSLVarying.h" #include +#include GR_DECLARE_STATIC_UNIQUE_KEY(gIndexBufferKey); @@ -48,6 +49,9 @@ public: template sk_sp makeVertexBuffer(const SkTArray& data) { return this->makeVertexBuffer(data.begin(), data.count()); } + template sk_sp makeVertexBuffer(const std::vector& data) { + return this->makeVertexBuffer(data.data(), data.size()); + } template sk_sp makeVertexBuffer(const T* data, int count); void drawMesh(const GrMesh& mesh); @@ -144,7 +148,7 @@ DEF_GPUTEST_FOR_RENDERING_CONTEXTS(GrMeshTest, reporter, ctxInfo) { VALIDATE(vbuff); for (int y = 0; y < kBoxCountY; ++y) { GrMesh mesh(kTriangles_GrPrimitiveType); - mesh.setNonIndexed(kBoxCountX * 6); + mesh.setNonIndexedNonInstanced(kBoxCountX * 6); mesh.setVertexData(vbuff.get(), y * kBoxCountX * 6); helper->drawMesh(mesh); } @@ -189,6 +193,55 @@ DEF_GPUTEST_FOR_RENDERING_CONTEXTS(GrMeshTest, reporter, ctxInfo) { helper->drawMesh(mesh); } }); + + for (bool indexed : {false, true}) { + if (!context->caps()->instanceAttribSupport()) { + break; + } + + run_test(indexed ? "setIndexedInstanced" : "setInstanced", + reporter, rtc, gold, [&](DrawMeshHelper* helper) { + auto idxbuff = indexed ? helper->getIndexBuffer() : nullptr; + auto instbuff = helper->makeVertexBuffer(boxes); + VALIDATE(instbuff); + auto vbuff = helper->makeVertexBuffer(std::vector{0,0, 0,1, 1,0, 1,1}); + VALIDATE(vbuff); + auto vbuff2 = helper->makeVertexBuffer( // for testing base vertex. + std::vector{-1,-1, -1,-1, 0,0, 0,1, 1,0, 1,1}); + VALIDATE(vbuff2); + + // Draw boxes one line at a time to exercise base instance, base vertex, and null vertex + // buffer. setIndexedInstanced intentionally does not support a base index. + for (int y = 0; y < kBoxCountY; ++y) { + GrMesh mesh(indexed ? kTriangles_GrPrimitiveType : kTriangleStrip_GrPrimitiveType); + if (indexed) { + VALIDATE(idxbuff); + mesh.setIndexedInstanced(idxbuff.get(), 6, + instbuff.get(), kBoxCountX, y * kBoxCountX); + } else { + mesh.setInstanced(instbuff.get(), kBoxCountX, y * kBoxCountX, 4); + } + switch (y % 3) { + case 0: + if (context->caps()->shaderCaps()->vertexIDSupport()) { + if (y % 2) { + // We don't need this call because it's the initial state of GrMesh. + mesh.setVertexData(nullptr); + } + break; + } + // Fallthru. + case 1: + mesh.setVertexData(vbuff.get()); + break; + case 2: + mesh.setVertexData(vbuff2.get(), 2); + break; + } + helper->drawMesh(mesh); + } + }); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -222,21 +275,36 @@ private: class GrMeshTestProcessor : public GrGeometryProcessor { public: - GrMeshTestProcessor() - : fVertex(this->addVertexAttrib("vertex", kVec2f_GrVertexAttribType)) - , fColor(this->addVertexAttrib("color", kVec4ub_GrVertexAttribType)) { + GrMeshTestProcessor(bool instanced, bool hasVertexBuffer) + : fInstanceLocation(nullptr) + , fVertex(nullptr) + , fColor(nullptr) { + if (instanced) { + fInstanceLocation = &this->addInstanceAttrib("location", kVec2f_GrVertexAttribType); + if (hasVertexBuffer) { + fVertex = &this->addVertexAttrib("vertex", kVec2f_GrVertexAttribType); + } + fColor = &this->addInstanceAttrib("color", kVec4ub_GrVertexAttribType); + } else { + fVertex = &this->addVertexAttrib("vertex", kVec2f_GrVertexAttribType); + fColor = &this->addVertexAttrib("color", kVec4ub_GrVertexAttribType); + } this->initClassID(); } const char* name() const override { return "GrMeshTest Processor"; } - void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const final {} + void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const final { + b->add32(SkToBool(fInstanceLocation)); + b->add32(SkToBool(fVertex)); + } GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const final; protected: - const Attribute& fVertex; - const Attribute& fColor; + const Attribute* fInstanceLocation; + const Attribute* fVertex; + const Attribute* fColor; friend class GLSLMeshTestProcessor; typedef GrGeometryProcessor INHERITED; @@ -251,10 +319,20 @@ class GLSLMeshTestProcessor : public GrGLSLGeometryProcessor { GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler; varyingHandler->emitAttributes(mp); - varyingHandler->addPassThroughAttribute(&mp.fColor, args.fOutputColor); + varyingHandler->addPassThroughAttribute(mp.fColor, args.fOutputColor); GrGLSLVertexBuilder* v = args.fVertBuilder; - v->codeAppendf("vec2 vertex = %s;", mp.fVertex.fName); + if (!mp.fInstanceLocation) { + v->codeAppendf("vec2 vertex = %s;", mp.fVertex->fName); + } else { + if (mp.fVertex) { + v->codeAppendf("vec2 offset = %s;", mp.fVertex->fName); + } else { + v->codeAppend ("vec2 offset = vec2(sk_VertexID / 2, sk_VertexID % 2);"); + } + v->codeAppendf("vec2 vertex = %s + offset * %i;", + mp.fInstanceLocation->fName, kBoxSize); + } gpArgs->fPositionVar.set(kVec2f_GrSLType, "vertex"); GrGLSLPPFragmentBuilder* f = args.fFragBuilder; @@ -287,7 +365,8 @@ sk_sp DrawMeshHelper::getIndexBuffer() { void DrawMeshHelper::drawMesh(const GrMesh& mesh) { GrRenderTarget* rt = fState->drawOpArgs().fRenderTarget; GrPipeline pipeline(rt, SkBlendMode::kSrc); - fState->commandBuffer()->draw(pipeline, GrMeshTestProcessor(), &mesh, 1, + GrMeshTestProcessor mtp(mesh.isInstanced(), mesh.hasVertexData()); + fState->commandBuffer()->draw(pipeline, mtp, &mesh, 1, SkRect::MakeIWH(kImageWidth, kImageHeight)); } diff --git a/tests/PrimitiveProcessorTest.cpp b/tests/PrimitiveProcessorTest.cpp index 6afc3acec8..66b96f7b93 100644 --- a/tests/PrimitiveProcessorTest.cpp +++ b/tests/PrimitiveProcessorTest.cpp @@ -69,7 +69,7 @@ private: void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override { const GP& gp = args.fGP.cast(); args.fVaryingHandler->emitAttributes(gp); - this->setupPosition(args.fVertBuilder, gpArgs, gp.fAttribs[0].fName); + this->setupPosition(args.fVertBuilder, gpArgs, gp.getAttrib(0).fName); GrGLSLPPFragmentBuilder* fragBuilder = args.fFragBuilder; fragBuilder->codeAppendf("%s = vec4(1);", args.fOutputColor); fragBuilder->codeAppendf("%s = vec4(1);", args.fOutputCoverage);