diff --git a/gn/graphite.gni b/gn/graphite.gni index acfce1b820..4ae3da7095 100644 --- a/gn/graphite.gni +++ b/gn/graphite.gni @@ -115,6 +115,7 @@ skia_graphite_sources = [ "$_src/geom/Transform_graphite.h", "$_src/render/CoverBoundsRenderStep.cpp", "$_src/render/CoverBoundsRenderStep.h", + "$_src/render/DynamicInstancesPatchAllocator.h", "$_src/render/MiddleOutFanRenderStep.cpp", "$_src/render/MiddleOutFanRenderStep.h", "$_src/render/StencilAndCoverDSS.h", diff --git a/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h b/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h index 78c5f1061f..7292d767c2 100644 --- a/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h +++ b/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h @@ -29,7 +29,6 @@ public: : fWorstCaseTolerances(worstCaseTolerances) , fBuilder(target, chunks, stride, minVerticesPerChunk) {} - size_t stride() const { return fBuilder.stride(); } VertexWriter append(const tess::LinearTolerances& tolerances) { fWorstCaseTolerances->accumulate(tolerances); return fBuilder.appendVertices(1); diff --git a/src/gpu/graphite/DrawWriter.h b/src/gpu/graphite/DrawWriter.h index cc35305869..397ad6594b 100644 --- a/src/gpu/graphite/DrawWriter.h +++ b/src/gpu/graphite/DrawWriter.h @@ -129,20 +129,37 @@ public: // drawIndexedInstanced() (depending on presence of index data in the template). Unlike the // Instances mode, the template's index or vertex count is not provided at the time of creation. // Instead, DynamicInstances can be used with pipeline programs that can have a flexible number - // of vertices per instance. Appended instances specify the minimum index/vertex count they - // must be drawn with, but if they are later batched with instances that would use more, the - // pipeline's vertex shader knows how to handle it. + // of vertices per instance. Appended instances specify a proxy object that can be converted + // to the minimum index/vertex count they must be drawn with; but if they are later batched with + // instances that would use more, the pipeline's vertex shader knows how to handle it. + // + // The proxy object serves as a useful point of indirection when the actual index count is + // expensive to compute, but can be derived from correlated geometric properties. The proxy + // can store those properties and accumulate a "worst-case" and then calculate the index count + // when DrawWriter has to flush. + // + // The VertexCountProxy type must provide: + // - a default constructor and copy assignment, where the initial value represents the minimum + // supported vertex count. + // - an 'unsigned int' operator that converts the proxy to the actual index count that is + // needed in order to dispatch a draw call. + // - operator <<(const V&) where V is any type the caller wants to pass to append() that + // represents the proxy for the about-to-be-written instances. This operator then updates its + // internal state to represent the worst case between what had previously been recorded and + // the latest V value. // // Usage for drawInstanced (fixedIndices == {}) or drawIndexedInstanced: - // DrawWriter::DynamicInstances instances(writer, fixedVerts, fixedIndices); - // instances.append(minIndexCount1, n1) << ...; - // instances.append(minIndexCount2, n2) << ...; + // DrawWriter::DynamicInstances instances(writer, fixedVerts, fixedIndices); + // instances.append(minIndexProxy1, n1) << ...; + // instances.append(minIndexProxy2, n2) << ...; // // In this example, if the two sets of instances were contiguous, a single draw call with // (n1 + n2) instances would still be made using max(minIndexCount1, minIndexCount2) as the - // index/vertex count. If the available vertex data from the DrawBufferManager forced a flush - // after the first, then the second would use minIndexCount2 unless a subsequent compatible - // DynamicInstances template appended more contiguous data. + // index/vertex count, 'minIndexCountX' was derived from 'minIndexProxyX'. If the available + // vertex data from the DrawBufferManager forced a flush after the first, then the second would + // use minIndexCount2 unless a subsequent compatible DynamicInstances template appended more + // contiguous data. + template class DynamicInstances; // Issues a draws with fully specified data. This can be used when all instance data has already @@ -200,6 +217,7 @@ private: void bindAndFlush(BindBufferInfo vertices, BindBufferInfo indices, BindBufferInfo instances, unsigned int templateCount, unsigned int drawCount) { SkASSERT(drawCount > 0); + SkASSERT(!fAppender); // shouldn't be appending and manually drawing at the same time. this->setTemplate(vertices, indices, instances, SkTo(templateCount)); fPendingBase = 0; fPendingCount = drawCount; @@ -252,7 +270,7 @@ public: SkDEBUGCODE(w.fAppender = this;) } - ~Appender() { + virtual ~Appender() { if (fReservedCount > 0) { fDrawer.fManager->returnVertexBytes(fReservedCount * fStride); } @@ -268,6 +286,8 @@ protected: unsigned int fReservedCount; // in target stride units VertexWriter fNextWriter; // writing to the target buffer binding + virtual void onFlush() {} + void reserve(unsigned int count) { if (fReservedCount >= count) { return; @@ -285,6 +305,7 @@ protected: reservedChunk.fOffset != (fTarget.fOffset + (fDrawer.fPendingBase + fDrawer.fPendingCount) * fStride)) { // Not contiguous, so flush and update binding to 'reservedChunk' + this->onFlush(); fDrawer.flush(); fTarget = reservedChunk; @@ -329,6 +350,7 @@ public: using Appender::append; }; +template class DrawWriter::DynamicInstances : private DrawWriter::Appender { public: DynamicInstances(DrawWriter& w, @@ -338,17 +360,42 @@ public: w.setTemplate(vertices, indices, w.fInstances, -1); } + ~DynamicInstances() override { + // Persist the template count since the DrawWriter could continue batching if a new + // compatible DynamicInstances object is created for the next draw. + this->updateTemplateCount(); + } + using Appender::reserve; - VertexWriter append(unsigned int indexCount, unsigned int instanceCount) { - SkASSERT(indexCount > 0); + template + VertexWriter append(const V& vertexCount, unsigned int instanceCount) { VertexWriter w = this->Appender::append(instanceCount); // Record index count after appending instance data in case the append triggered a flush - // or earlier dynamic instances and the max index count is reset. This ensures that the - // just appended instances will be flushed with a template count at least 'indexCount'. - fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo(indexCount) - 1); + // and the max index count is reset. However, the contents of 'w' will not have been flushed + // so 'fProxy' will account for 'vertexCount' when it is actually drawn. + fProxy << vertexCount; return w; } + +private: + void updateTemplateCount() { + unsigned int count = static_cast(fProxy); + SkASSERT(count > 0); + fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo(count) - 1); + // By resetting the proxy after updating the template count, the next batch will start over + // with the minimum required vertex count and grow from there. + fProxy = {}; + } + + void onFlush() override { + // Update the DrawWriter's template count before its flush() is invoked and the appender + // starts recording to a new buffer, which ensures the flush's draw call uses the most + // up-to-date vertex count derived from fProxy. + this->updateTemplateCount(); + } + + VertexCountProxy fProxy = {}; }; } // namespace skgpu::graphite diff --git a/src/gpu/graphite/render/BUILD.bazel b/src/gpu/graphite/render/BUILD.bazel index c9eb8fd64e..4ccb1ffd00 100644 --- a/src/gpu/graphite/render/BUILD.bazel +++ b/src/gpu/graphite/render/BUILD.bazel @@ -78,6 +78,7 @@ generated_cc_atom( srcs = ["TessellateCurvesRenderStep.cpp"], visibility = ["//:__subpackages__"], deps = [ + ":DynamicInstancesPatchAllocator_hdr", ":StencilAndCoverDSS_hdr", ":TessellateCurvesRenderStep_hdr", "//src/gpu/graphite:DrawGeometry_hdr", @@ -100,6 +101,7 @@ generated_cc_atom( srcs = ["TessellateWedgesRenderStep.cpp"], visibility = ["//:__subpackages__"], deps = [ + ":DynamicInstancesPatchAllocator_hdr", ":TessellateWedgesRenderStep_hdr", "//src/gpu/graphite:DrawGeometry_hdr", "//src/gpu/graphite:DrawWriter_hdr", @@ -109,3 +111,14 @@ generated_cc_atom( "//src/gpu/tessellate:PatchWriter_hdr", ], ) + +generated_cc_atom( + name = "DynamicInstancesPatchAllocator_hdr", + hdrs = ["DynamicInstancesPatchAllocator.h"], + visibility = ["//:__subpackages__"], + deps = [ + "//src/gpu:BufferWriter_hdr", + "//src/gpu/graphite:DrawWriter_hdr", + "//src/gpu/tessellate:LinearTolerances_hdr", + ], +) diff --git a/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h b/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h new file mode 100644 index 0000000000..dba4b1f2eb --- /dev/null +++ b/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h @@ -0,0 +1,58 @@ +/* + * Copyright 2022 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED +#define skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED + +#include "src/gpu/BufferWriter.h" +#include "src/gpu/graphite/DrawWriter.h" +#include "src/gpu/tessellate/LinearTolerances.h" + +namespace skgpu::graphite { + +// An adapter around DrawWriter::DynamicInstances that fits the API requirements of +// skgpu::tess::PatchWriter's PatchAllocator template parameter. +// +// FixedCountVariant should be one of FixedCountCurves, FixedCountWedges, or FixedCountStrokes +// (technically any class with a static VertexCount(const LinearTolerances&) function). +template +class DynamicInstancesPatchAllocator { +public: + // 'stride' is provided by PatchWriter. + // 'writer' is the DrawWriter that the RenderStep can append instance data to, + // 'fixedVertexBuffer' and 'fixedIndexBuffer' are the bindings for the instance template that + // is passed to DrawWriter::DynamicInstances. + DynamicInstancesPatchAllocator(size_t stride, + DrawWriter& writer, + BindBufferInfo fixedVertexBuffer, + BindBufferInfo fixedIndexBuffer, + unsigned int reserveCount) + : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) { + SkASSERT(stride == writer.instanceStride()); + // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will + // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway? + fInstances.reserve(reserveCount); + } + + VertexWriter append(const tess::LinearTolerances& tolerances) { + return fInstances.append(tolerances, 1); + } + +private: + struct LinearToleranceProxy { + operator unsigned int() const { return FixedCountVariant::VertexCount(fTolerances); } + void operator <<(const tess::LinearTolerances& t) { fTolerances.accumulate(t); } + + tess::LinearTolerances fTolerances; + }; + + DrawWriter::DynamicInstances fInstances; +}; + +} // namespace skgpu::graphite + +#endif // skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED diff --git a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp index fc03c42caf..79759812fe 100644 --- a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp +++ b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp @@ -9,6 +9,7 @@ #include "src/gpu/graphite/DrawGeometry.h" #include "src/gpu/graphite/DrawWriter.h" +#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h" #include "src/gpu/graphite/render/StencilAndCoverDSS.h" #include "src/gpu/tessellate/AffineMatrix.h" @@ -21,38 +22,12 @@ namespace { using namespace skgpu::tess; -// TODO: This can be shared by the other path tessellators if PatchWriter can provide the correct -// index count based on its traits (curve, wedge, or stroke). -// Satisfies API requirements for PatchAllocator template to PatchWriter, using -// DrawWriter::DynamicInstances. -struct DrawWriterAllocator { - DrawWriterAllocator(size_t stride, // required for PatchAllocator - DrawWriter& writer, - BindBufferInfo fixedVertexBuffer, - BindBufferInfo fixedIndexBuffer, - unsigned int reserveCount) - : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) { - SkASSERT(writer.instanceStride() == stride); - // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will - // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway? - fInstances.reserve(reserveCount); - } - - VertexWriter append(const LinearTolerances& tolerances) { - // TODO (skbug.com/13056): Converting tolerances into an index count for every instance is - // wasteful; it only has to be computed when we flush. - return fInstances.append(FixedCountCurves::VertexCount(tolerances), 1); - } - - DrawWriter::DynamicInstances fInstances; -}; - // No fan point or stroke params, since this is for filled curves (not strokes or wedges) // No explicit curve type, since we assume infinity is supported on GPUs using graphite // No color or wide color attribs, since it might always be part of the PaintParams // or we'll add a color-only fast path to RenderStep later. static constexpr PatchAttribs kAttribs = PatchAttribs::kPaintDepth; -using Writer = PatchWriter, Required, AddTrianglesWhenChopping, DiscardFlatCurves>; diff --git a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp index 7dfd36c931..690a19cc7f 100644 --- a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp +++ b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp @@ -9,6 +9,7 @@ #include "src/gpu/graphite/DrawGeometry.h" #include "src/gpu/graphite/DrawWriter.h" +#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h" #include "src/gpu/tessellate/AffineMatrix.h" #include "src/gpu/tessellate/FixedCountBufferUtils.h" @@ -21,30 +22,6 @@ namespace { using namespace skgpu::tess; -// TODO: This is very similar to DrawWriterAllocator in TessellateCurveRenderStep except that the -// index count is calculated differently. These will be merged once skbug.com/13056 is resolved. -struct DrawWriterAllocator { - DrawWriterAllocator(size_t stride, // required for PatchAllocator - DrawWriter& writer, - BindBufferInfo fixedVertexBuffer, - BindBufferInfo fixedIndexBuffer, - unsigned int reserveCount) - : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) { - SkASSERT(writer.instanceStride() == stride); - // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will - // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway? - fInstances.reserve(reserveCount); - } - - VertexWriter append(const LinearTolerances& tolerances) { - // TODO (skbug.com/13056): Converting tolerances into an index count for every instance is - // wasteful; it only has to be computed when we flush. - return fInstances.append(FixedCountWedges::VertexCount(tolerances), 1); - } - - DrawWriter::DynamicInstances fInstances; -}; - // Only kFanPoint, no stroke params, since this is for filled wedges. // No explicit curve type, since we assume infinity is supported on GPUs using graphite // No color or wide color attribs, since it might always be part of the PaintParams @@ -52,7 +29,7 @@ struct DrawWriterAllocator { static constexpr PatchAttribs kAttribs = PatchAttribs::kFanPoint | PatchAttribs::kPaintDepth; -using Writer = PatchWriter, Required, Required>; diff --git a/src/gpu/tessellate/PatchWriter.h b/src/gpu/tessellate/PatchWriter.h index d31041ddea..d2cc59d0ae 100644 --- a/src/gpu/tessellate/PatchWriter.h +++ b/src/gpu/tessellate/PatchWriter.h @@ -56,8 +56,6 @@ namespace skgpu::tess { * * In addition to variable traits, PatchWriter's first template argument defines the type used for * allocating the GPU instance data. The templated "PatchAllocator" can be any type that provides: - * // The stride of each instance in bytes - * size_t stride() const; * // A GPU-backed vertex writer for a single instance worth of data. The provided * // LinearTolerances value represents the tolerances for the curve that will be written to the * // returned vertex space. @@ -303,7 +301,7 @@ public: // time the deferred patch was recorded. fTolerances.setParametricSegments(fDeferredPatch.fN_p4); if (VertexWriter vw = fPatchAllocator.append(fTolerances)) { - vw << VertexWriter::Array(fDeferredPatch.fData, fPatchAllocator.stride()); + vw << VertexWriter::Array(fDeferredPatch.fData, PatchStride(fAttribs)); } } @@ -499,11 +497,11 @@ private: if constexpr (kTrackJoinControlPoints) { if (fDeferredPatch.fMustDefer) { SkASSERT(!fDeferredPatch.hasPending()); - SkASSERT(fPatchAllocator.stride() <= kMaxStride); + SkASSERT(PatchStride(fAttribs) <= kMaxStride); // Save the computed parametric segment tolerance value so that we can pass that to // the PatchAllocator when flushing the deferred patch. fDeferredPatch.fN_p4 = fTolerances.numParametricSegments_p4(); - return {fDeferredPatch.fData, fPatchAllocator.stride()}; + return {fDeferredPatch.fData, PatchStride(fAttribs)}; } } return fPatchAllocator.append(fTolerances);