[graphite] Only convert LinearTolerances to vertex counts when invoking draw calls
Instead of taking the minimum required index/vertex count for an instance, DynamicInstances::append() now takes a proxy. The proxy can be used to calculate the index when it's needed at flush time, and can accumulate its internal state to represent a worst case that corresponds to the worst case index count as well. Updates the tessellating RenderSteps to use a single PatchAllocator definition that just swaps out how tess::LinearTolerances are converted to a vertex count. This will also work seamlessly with FixedCountStrokes. Removes stride() function from PatchAllocator because it's clunky to try and pass that out of DrawWriter or RenderStep to PatchWriter, and can be re-computed from fAttribs anyways. Since it's only re-used in PatchWriter for the deferred patch, it's one sum for every path contour and will likely not be noticed in perf. If it is, we can have PatchWriter remember it instead and keep PatchAllocator simple still. Bug: skia:13056, skia:13012 Change-Id: I52a36da4df23ebb01a98246458b6538744c1815f Reviewed-on: https://skia-review.googlesource.com/c/skia/+/540620 Commit-Queue: Michael Ludwig <michaelludwig@google.com> Reviewed-by: Robert Phillips <robertphillips@google.com>
This commit is contained in:
parent
4621ef2a8a
commit
1186b60fcf
@ -115,6 +115,7 @@ skia_graphite_sources = [
|
||||
"$_src/geom/Transform_graphite.h",
|
||||
"$_src/render/CoverBoundsRenderStep.cpp",
|
||||
"$_src/render/CoverBoundsRenderStep.h",
|
||||
"$_src/render/DynamicInstancesPatchAllocator.h",
|
||||
"$_src/render/MiddleOutFanRenderStep.cpp",
|
||||
"$_src/render/MiddleOutFanRenderStep.h",
|
||||
"$_src/render/StencilAndCoverDSS.h",
|
||||
|
@ -29,7 +29,6 @@ public:
|
||||
: fWorstCaseTolerances(worstCaseTolerances)
|
||||
, fBuilder(target, chunks, stride, minVerticesPerChunk) {}
|
||||
|
||||
size_t stride() const { return fBuilder.stride(); }
|
||||
VertexWriter append(const tess::LinearTolerances& tolerances) {
|
||||
fWorstCaseTolerances->accumulate(tolerances);
|
||||
return fBuilder.appendVertices(1);
|
||||
|
@ -129,20 +129,37 @@ public:
|
||||
// drawIndexedInstanced() (depending on presence of index data in the template). Unlike the
|
||||
// Instances mode, the template's index or vertex count is not provided at the time of creation.
|
||||
// Instead, DynamicInstances can be used with pipeline programs that can have a flexible number
|
||||
// of vertices per instance. Appended instances specify the minimum index/vertex count they
|
||||
// must be drawn with, but if they are later batched with instances that would use more, the
|
||||
// pipeline's vertex shader knows how to handle it.
|
||||
// of vertices per instance. Appended instances specify a proxy object that can be converted
|
||||
// to the minimum index/vertex count they must be drawn with; but if they are later batched with
|
||||
// instances that would use more, the pipeline's vertex shader knows how to handle it.
|
||||
//
|
||||
// The proxy object serves as a useful point of indirection when the actual index count is
|
||||
// expensive to compute, but can be derived from correlated geometric properties. The proxy
|
||||
// can store those properties and accumulate a "worst-case" and then calculate the index count
|
||||
// when DrawWriter has to flush.
|
||||
//
|
||||
// The VertexCountProxy type must provide:
|
||||
// - a default constructor and copy assignment, where the initial value represents the minimum
|
||||
// supported vertex count.
|
||||
// - an 'unsigned int' operator that converts the proxy to the actual index count that is
|
||||
// needed in order to dispatch a draw call.
|
||||
// - operator <<(const V&) where V is any type the caller wants to pass to append() that
|
||||
// represents the proxy for the about-to-be-written instances. This operator then updates its
|
||||
// internal state to represent the worst case between what had previously been recorded and
|
||||
// the latest V value.
|
||||
//
|
||||
// Usage for drawInstanced (fixedIndices == {}) or drawIndexedInstanced:
|
||||
// DrawWriter::DynamicInstances instances(writer, fixedVerts, fixedIndices);
|
||||
// instances.append(minIndexCount1, n1) << ...;
|
||||
// instances.append(minIndexCount2, n2) << ...;
|
||||
// DrawWriter::DynamicInstances<ProxyType> instances(writer, fixedVerts, fixedIndices);
|
||||
// instances.append(minIndexProxy1, n1) << ...;
|
||||
// instances.append(minIndexProxy2, n2) << ...;
|
||||
//
|
||||
// In this example, if the two sets of instances were contiguous, a single draw call with
|
||||
// (n1 + n2) instances would still be made using max(minIndexCount1, minIndexCount2) as the
|
||||
// index/vertex count. If the available vertex data from the DrawBufferManager forced a flush
|
||||
// after the first, then the second would use minIndexCount2 unless a subsequent compatible
|
||||
// DynamicInstances template appended more contiguous data.
|
||||
// index/vertex count, 'minIndexCountX' was derived from 'minIndexProxyX'. If the available
|
||||
// vertex data from the DrawBufferManager forced a flush after the first, then the second would
|
||||
// use minIndexCount2 unless a subsequent compatible DynamicInstances template appended more
|
||||
// contiguous data.
|
||||
template <typename VertexCountProxy>
|
||||
class DynamicInstances;
|
||||
|
||||
// Issues a draws with fully specified data. This can be used when all instance data has already
|
||||
@ -200,6 +217,7 @@ private:
|
||||
void bindAndFlush(BindBufferInfo vertices, BindBufferInfo indices, BindBufferInfo instances,
|
||||
unsigned int templateCount, unsigned int drawCount) {
|
||||
SkASSERT(drawCount > 0);
|
||||
SkASSERT(!fAppender); // shouldn't be appending and manually drawing at the same time.
|
||||
this->setTemplate(vertices, indices, instances, SkTo<int>(templateCount));
|
||||
fPendingBase = 0;
|
||||
fPendingCount = drawCount;
|
||||
@ -252,7 +270,7 @@ public:
|
||||
SkDEBUGCODE(w.fAppender = this;)
|
||||
}
|
||||
|
||||
~Appender() {
|
||||
virtual ~Appender() {
|
||||
if (fReservedCount > 0) {
|
||||
fDrawer.fManager->returnVertexBytes(fReservedCount * fStride);
|
||||
}
|
||||
@ -268,6 +286,8 @@ protected:
|
||||
unsigned int fReservedCount; // in target stride units
|
||||
VertexWriter fNextWriter; // writing to the target buffer binding
|
||||
|
||||
virtual void onFlush() {}
|
||||
|
||||
void reserve(unsigned int count) {
|
||||
if (fReservedCount >= count) {
|
||||
return;
|
||||
@ -285,6 +305,7 @@ protected:
|
||||
reservedChunk.fOffset !=
|
||||
(fTarget.fOffset + (fDrawer.fPendingBase + fDrawer.fPendingCount) * fStride)) {
|
||||
// Not contiguous, so flush and update binding to 'reservedChunk'
|
||||
this->onFlush();
|
||||
fDrawer.flush();
|
||||
|
||||
fTarget = reservedChunk;
|
||||
@ -329,6 +350,7 @@ public:
|
||||
using Appender::append;
|
||||
};
|
||||
|
||||
template <typename VertexCountProxy>
|
||||
class DrawWriter::DynamicInstances : private DrawWriter::Appender {
|
||||
public:
|
||||
DynamicInstances(DrawWriter& w,
|
||||
@ -338,17 +360,42 @@ public:
|
||||
w.setTemplate(vertices, indices, w.fInstances, -1);
|
||||
}
|
||||
|
||||
~DynamicInstances() override {
|
||||
// Persist the template count since the DrawWriter could continue batching if a new
|
||||
// compatible DynamicInstances object is created for the next draw.
|
||||
this->updateTemplateCount();
|
||||
}
|
||||
|
||||
using Appender::reserve;
|
||||
|
||||
VertexWriter append(unsigned int indexCount, unsigned int instanceCount) {
|
||||
SkASSERT(indexCount > 0);
|
||||
template <typename V>
|
||||
VertexWriter append(const V& vertexCount, unsigned int instanceCount) {
|
||||
VertexWriter w = this->Appender::append(instanceCount);
|
||||
// Record index count after appending instance data in case the append triggered a flush
|
||||
// or earlier dynamic instances and the max index count is reset. This ensures that the
|
||||
// just appended instances will be flushed with a template count at least 'indexCount'.
|
||||
fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo<int>(indexCount) - 1);
|
||||
// and the max index count is reset. However, the contents of 'w' will not have been flushed
|
||||
// so 'fProxy' will account for 'vertexCount' when it is actually drawn.
|
||||
fProxy << vertexCount;
|
||||
return w;
|
||||
}
|
||||
|
||||
private:
|
||||
void updateTemplateCount() {
|
||||
unsigned int count = static_cast<unsigned int>(fProxy);
|
||||
SkASSERT(count > 0);
|
||||
fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo<int>(count) - 1);
|
||||
// By resetting the proxy after updating the template count, the next batch will start over
|
||||
// with the minimum required vertex count and grow from there.
|
||||
fProxy = {};
|
||||
}
|
||||
|
||||
void onFlush() override {
|
||||
// Update the DrawWriter's template count before its flush() is invoked and the appender
|
||||
// starts recording to a new buffer, which ensures the flush's draw call uses the most
|
||||
// up-to-date vertex count derived from fProxy.
|
||||
this->updateTemplateCount();
|
||||
}
|
||||
|
||||
VertexCountProxy fProxy = {};
|
||||
};
|
||||
|
||||
} // namespace skgpu::graphite
|
||||
|
@ -78,6 +78,7 @@ generated_cc_atom(
|
||||
srcs = ["TessellateCurvesRenderStep.cpp"],
|
||||
visibility = ["//:__subpackages__"],
|
||||
deps = [
|
||||
":DynamicInstancesPatchAllocator_hdr",
|
||||
":StencilAndCoverDSS_hdr",
|
||||
":TessellateCurvesRenderStep_hdr",
|
||||
"//src/gpu/graphite:DrawGeometry_hdr",
|
||||
@ -100,6 +101,7 @@ generated_cc_atom(
|
||||
srcs = ["TessellateWedgesRenderStep.cpp"],
|
||||
visibility = ["//:__subpackages__"],
|
||||
deps = [
|
||||
":DynamicInstancesPatchAllocator_hdr",
|
||||
":TessellateWedgesRenderStep_hdr",
|
||||
"//src/gpu/graphite:DrawGeometry_hdr",
|
||||
"//src/gpu/graphite:DrawWriter_hdr",
|
||||
@ -109,3 +111,14 @@ generated_cc_atom(
|
||||
"//src/gpu/tessellate:PatchWriter_hdr",
|
||||
],
|
||||
)
|
||||
|
||||
generated_cc_atom(
|
||||
name = "DynamicInstancesPatchAllocator_hdr",
|
||||
hdrs = ["DynamicInstancesPatchAllocator.h"],
|
||||
visibility = ["//:__subpackages__"],
|
||||
deps = [
|
||||
"//src/gpu:BufferWriter_hdr",
|
||||
"//src/gpu/graphite:DrawWriter_hdr",
|
||||
"//src/gpu/tessellate:LinearTolerances_hdr",
|
||||
],
|
||||
)
|
||||
|
58
src/gpu/graphite/render/DynamicInstancesPatchAllocator.h
Normal file
58
src/gpu/graphite/render/DynamicInstancesPatchAllocator.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
|
||||
#define skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
|
||||
|
||||
#include "src/gpu/BufferWriter.h"
|
||||
#include "src/gpu/graphite/DrawWriter.h"
|
||||
#include "src/gpu/tessellate/LinearTolerances.h"
|
||||
|
||||
namespace skgpu::graphite {
|
||||
|
||||
// An adapter around DrawWriter::DynamicInstances that fits the API requirements of
|
||||
// skgpu::tess::PatchWriter's PatchAllocator template parameter.
|
||||
//
|
||||
// FixedCountVariant should be one of FixedCountCurves, FixedCountWedges, or FixedCountStrokes
|
||||
// (technically any class with a static VertexCount(const LinearTolerances&) function).
|
||||
template <typename FixedCountVariant>
|
||||
class DynamicInstancesPatchAllocator {
|
||||
public:
|
||||
// 'stride' is provided by PatchWriter.
|
||||
// 'writer' is the DrawWriter that the RenderStep can append instance data to,
|
||||
// 'fixedVertexBuffer' and 'fixedIndexBuffer' are the bindings for the instance template that
|
||||
// is passed to DrawWriter::DynamicInstances.
|
||||
DynamicInstancesPatchAllocator(size_t stride,
|
||||
DrawWriter& writer,
|
||||
BindBufferInfo fixedVertexBuffer,
|
||||
BindBufferInfo fixedIndexBuffer,
|
||||
unsigned int reserveCount)
|
||||
: fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
|
||||
SkASSERT(stride == writer.instanceStride());
|
||||
// TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
|
||||
// appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
|
||||
fInstances.reserve(reserveCount);
|
||||
}
|
||||
|
||||
VertexWriter append(const tess::LinearTolerances& tolerances) {
|
||||
return fInstances.append(tolerances, 1);
|
||||
}
|
||||
|
||||
private:
|
||||
struct LinearToleranceProxy {
|
||||
operator unsigned int() const { return FixedCountVariant::VertexCount(fTolerances); }
|
||||
void operator <<(const tess::LinearTolerances& t) { fTolerances.accumulate(t); }
|
||||
|
||||
tess::LinearTolerances fTolerances;
|
||||
};
|
||||
|
||||
DrawWriter::DynamicInstances<LinearToleranceProxy> fInstances;
|
||||
};
|
||||
|
||||
} // namespace skgpu::graphite
|
||||
|
||||
#endif // skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "src/gpu/graphite/DrawGeometry.h"
|
||||
#include "src/gpu/graphite/DrawWriter.h"
|
||||
#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h"
|
||||
#include "src/gpu/graphite/render/StencilAndCoverDSS.h"
|
||||
|
||||
#include "src/gpu/tessellate/AffineMatrix.h"
|
||||
@ -21,38 +22,12 @@ namespace {
|
||||
|
||||
using namespace skgpu::tess;
|
||||
|
||||
// TODO: This can be shared by the other path tessellators if PatchWriter can provide the correct
|
||||
// index count based on its traits (curve, wedge, or stroke).
|
||||
// Satisfies API requirements for PatchAllocator template to PatchWriter, using
|
||||
// DrawWriter::DynamicInstances.
|
||||
struct DrawWriterAllocator {
|
||||
DrawWriterAllocator(size_t stride, // required for PatchAllocator
|
||||
DrawWriter& writer,
|
||||
BindBufferInfo fixedVertexBuffer,
|
||||
BindBufferInfo fixedIndexBuffer,
|
||||
unsigned int reserveCount)
|
||||
: fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
|
||||
SkASSERT(writer.instanceStride() == stride);
|
||||
// TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
|
||||
// appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
|
||||
fInstances.reserve(reserveCount);
|
||||
}
|
||||
|
||||
VertexWriter append(const LinearTolerances& tolerances) {
|
||||
// TODO (skbug.com/13056): Converting tolerances into an index count for every instance is
|
||||
// wasteful; it only has to be computed when we flush.
|
||||
return fInstances.append(FixedCountCurves::VertexCount(tolerances), 1);
|
||||
}
|
||||
|
||||
DrawWriter::DynamicInstances fInstances;
|
||||
};
|
||||
|
||||
// No fan point or stroke params, since this is for filled curves (not strokes or wedges)
|
||||
// No explicit curve type, since we assume infinity is supported on GPUs using graphite
|
||||
// No color or wide color attribs, since it might always be part of the PaintParams
|
||||
// or we'll add a color-only fast path to RenderStep later.
|
||||
static constexpr PatchAttribs kAttribs = PatchAttribs::kPaintDepth;
|
||||
using Writer = PatchWriter<DrawWriterAllocator,
|
||||
using Writer = PatchWriter<DynamicInstancesPatchAllocator<FixedCountCurves>,
|
||||
Required<PatchAttribs::kPaintDepth>,
|
||||
AddTrianglesWhenChopping,
|
||||
DiscardFlatCurves>;
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "src/gpu/graphite/DrawGeometry.h"
|
||||
#include "src/gpu/graphite/DrawWriter.h"
|
||||
#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h"
|
||||
|
||||
#include "src/gpu/tessellate/AffineMatrix.h"
|
||||
#include "src/gpu/tessellate/FixedCountBufferUtils.h"
|
||||
@ -21,30 +22,6 @@ namespace {
|
||||
|
||||
using namespace skgpu::tess;
|
||||
|
||||
// TODO: This is very similar to DrawWriterAllocator in TessellateCurveRenderStep except that the
|
||||
// index count is calculated differently. These will be merged once skbug.com/13056 is resolved.
|
||||
struct DrawWriterAllocator {
|
||||
DrawWriterAllocator(size_t stride, // required for PatchAllocator
|
||||
DrawWriter& writer,
|
||||
BindBufferInfo fixedVertexBuffer,
|
||||
BindBufferInfo fixedIndexBuffer,
|
||||
unsigned int reserveCount)
|
||||
: fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
|
||||
SkASSERT(writer.instanceStride() == stride);
|
||||
// TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
|
||||
// appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
|
||||
fInstances.reserve(reserveCount);
|
||||
}
|
||||
|
||||
VertexWriter append(const LinearTolerances& tolerances) {
|
||||
// TODO (skbug.com/13056): Converting tolerances into an index count for every instance is
|
||||
// wasteful; it only has to be computed when we flush.
|
||||
return fInstances.append(FixedCountWedges::VertexCount(tolerances), 1);
|
||||
}
|
||||
|
||||
DrawWriter::DynamicInstances fInstances;
|
||||
};
|
||||
|
||||
// Only kFanPoint, no stroke params, since this is for filled wedges.
|
||||
// No explicit curve type, since we assume infinity is supported on GPUs using graphite
|
||||
// No color or wide color attribs, since it might always be part of the PaintParams
|
||||
@ -52,7 +29,7 @@ struct DrawWriterAllocator {
|
||||
static constexpr PatchAttribs kAttribs = PatchAttribs::kFanPoint |
|
||||
PatchAttribs::kPaintDepth;
|
||||
|
||||
using Writer = PatchWriter<DrawWriterAllocator,
|
||||
using Writer = PatchWriter<DynamicInstancesPatchAllocator<FixedCountWedges>,
|
||||
Required<PatchAttribs::kFanPoint>,
|
||||
Required<PatchAttribs::kPaintDepth>>;
|
||||
|
||||
|
@ -56,8 +56,6 @@ namespace skgpu::tess {
|
||||
*
|
||||
* In addition to variable traits, PatchWriter's first template argument defines the type used for
|
||||
* allocating the GPU instance data. The templated "PatchAllocator" can be any type that provides:
|
||||
* // The stride of each instance in bytes
|
||||
* size_t stride() const;
|
||||
* // A GPU-backed vertex writer for a single instance worth of data. The provided
|
||||
* // LinearTolerances value represents the tolerances for the curve that will be written to the
|
||||
* // returned vertex space.
|
||||
@ -303,7 +301,7 @@ public:
|
||||
// time the deferred patch was recorded.
|
||||
fTolerances.setParametricSegments(fDeferredPatch.fN_p4);
|
||||
if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
|
||||
vw << VertexWriter::Array<char>(fDeferredPatch.fData, fPatchAllocator.stride());
|
||||
vw << VertexWriter::Array<char>(fDeferredPatch.fData, PatchStride(fAttribs));
|
||||
}
|
||||
}
|
||||
|
||||
@ -499,11 +497,11 @@ private:
|
||||
if constexpr (kTrackJoinControlPoints) {
|
||||
if (fDeferredPatch.fMustDefer) {
|
||||
SkASSERT(!fDeferredPatch.hasPending());
|
||||
SkASSERT(fPatchAllocator.stride() <= kMaxStride);
|
||||
SkASSERT(PatchStride(fAttribs) <= kMaxStride);
|
||||
// Save the computed parametric segment tolerance value so that we can pass that to
|
||||
// the PatchAllocator when flushing the deferred patch.
|
||||
fDeferredPatch.fN_p4 = fTolerances.numParametricSegments_p4();
|
||||
return {fDeferredPatch.fData, fPatchAllocator.stride()};
|
||||
return {fDeferredPatch.fData, PatchStride(fAttribs)};
|
||||
}
|
||||
}
|
||||
return fPatchAllocator.append(fTolerances);
|
||||
|
Loading…
Reference in New Issue
Block a user