diff --git a/gn/graphite.gni b/gn/graphite.gni
index acfce1b820..4ae3da7095 100644
--- a/gn/graphite.gni
+++ b/gn/graphite.gni
@@ -115,6 +115,7 @@ skia_graphite_sources = [
   "$_src/geom/Transform_graphite.h",
   "$_src/render/CoverBoundsRenderStep.cpp",
   "$_src/render/CoverBoundsRenderStep.h",
+  "$_src/render/DynamicInstancesPatchAllocator.h",
   "$_src/render/MiddleOutFanRenderStep.cpp",
   "$_src/render/MiddleOutFanRenderStep.h",
   "$_src/render/StencilAndCoverDSS.h",
diff --git a/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h b/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h
index 78c5f1061f..7292d767c2 100644
--- a/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h
+++ b/src/gpu/ganesh/tessellate/VertexChunkPatchAllocator.h
@@ -29,7 +29,6 @@ public:
             : fWorstCaseTolerances(worstCaseTolerances)
             , fBuilder(target, chunks, stride, minVerticesPerChunk) {}
 
-    size_t stride() const { return fBuilder.stride(); }
     VertexWriter append(const tess::LinearTolerances& tolerances) {
         fWorstCaseTolerances->accumulate(tolerances);
         return fBuilder.appendVertices(1);
diff --git a/src/gpu/graphite/DrawWriter.h b/src/gpu/graphite/DrawWriter.h
index cc35305869..397ad6594b 100644
--- a/src/gpu/graphite/DrawWriter.h
+++ b/src/gpu/graphite/DrawWriter.h
@@ -129,20 +129,37 @@ public:
     // drawIndexedInstanced() (depending on presence of index data in the template). Unlike the
     // Instances mode, the template's index or vertex count is not provided at the time of creation.
     // Instead, DynamicInstances can be used with pipeline programs that can have a flexible number
-    // of vertices per instance. Appended instances specify the minimum index/vertex count they
-    // must be drawn with, but if they are later batched with instances that would use more, the
-    // pipeline's vertex shader knows how to handle it.
+    // of vertices per instance. Appended instances specify a proxy object that can be converted
+    // to the minimum index/vertex count they must be drawn with; but if they are later batched with
+    // instances that would use more, the pipeline's vertex shader knows how to handle it.
+    //
+    // The proxy object serves as a useful point of indirection when the actual index count is
+    // expensive to compute, but can be derived from correlated geometric properties. The proxy
+    // can store those properties and accumulate a "worst-case" and then calculate the index count
+    // when DrawWriter has to flush.
+    //
+    // The VertexCountProxy type must provide:
+    //  - a default constructor and copy assignment, where the initial value represents the minimum
+    //    supported vertex count.
+    //  - an 'unsigned int' operator that converts the proxy to the actual index count that is
+    //    needed in order to dispatch a draw call.
+    //  - operator <<(const V&) where V is any type the caller wants to pass to append() that
+    //    represents the proxy for the about-to-be-written instances. This operator then updates its
+    //    internal state to represent the worst case between what had previously been recorded and
+    //    the latest V value.
     //
     // Usage for drawInstanced (fixedIndices == {}) or drawIndexedInstanced:
-    //    DrawWriter::DynamicInstances instances(writer, fixedVerts, fixedIndices);
-    //    instances.append(minIndexCount1, n1) << ...;
-    //    instances.append(minIndexCount2, n2) << ...;
+    //    DrawWriter::DynamicInstances<ProxyType> instances(writer, fixedVerts, fixedIndices);
+    //    instances.append(minIndexProxy1, n1) << ...;
+    //    instances.append(minIndexProxy2, n2) << ...;
     //
     // In this example, if the two sets of instances were contiguous, a single draw call with
     // (n1 + n2) instances would still be made using max(minIndexCount1, minIndexCount2) as the
-    // index/vertex count. If the available vertex data from the DrawBufferManager forced a flush
-    // after the first, then the second would use minIndexCount2 unless a subsequent compatible
-    // DynamicInstances template appended more contiguous data.
+    // index/vertex count, 'minIndexCountX' was derived from 'minIndexProxyX'. If the available
+    // vertex data from the DrawBufferManager forced a flush after the first, then the second would
+    // use minIndexCount2 unless a subsequent compatible DynamicInstances template appended more
+    // contiguous data.
+    template <typename VertexCountProxy>
     class DynamicInstances;
 
     // Issues a draws with fully specified data. This can be used when all instance data has already
@@ -200,6 +217,7 @@ private:
     void bindAndFlush(BindBufferInfo vertices, BindBufferInfo indices, BindBufferInfo instances,
                       unsigned int templateCount, unsigned int drawCount) {
         SkASSERT(drawCount > 0);
+        SkASSERT(!fAppender); // shouldn't be appending and manually drawing at the same time.
         this->setTemplate(vertices, indices, instances, SkTo<int>(templateCount));
         fPendingBase = 0;
         fPendingCount = drawCount;
@@ -252,7 +270,7 @@ public:
         SkDEBUGCODE(w.fAppender = this;)
     }
 
-    ~Appender() {
+    virtual ~Appender() {
         if (fReservedCount > 0) {
             fDrawer.fManager->returnVertexBytes(fReservedCount * fStride);
         }
@@ -268,6 +286,8 @@ protected:
     unsigned int fReservedCount; // in target stride units
     VertexWriter fNextWriter;    // writing to the target buffer binding
 
+    virtual void onFlush() {}
+
     void reserve(unsigned int count) {
         if (fReservedCount >= count) {
             return;
@@ -285,6 +305,7 @@ protected:
             reservedChunk.fOffset !=
                     (fTarget.fOffset + (fDrawer.fPendingBase + fDrawer.fPendingCount) * fStride)) {
             // Not contiguous, so flush and update binding to 'reservedChunk'
+            this->onFlush();
             fDrawer.flush();
 
             fTarget = reservedChunk;
@@ -329,6 +350,7 @@ public:
     using Appender::append;
 };
 
+template <typename VertexCountProxy>
 class DrawWriter::DynamicInstances : private DrawWriter::Appender {
 public:
     DynamicInstances(DrawWriter& w,
@@ -338,17 +360,42 @@ public:
         w.setTemplate(vertices, indices, w.fInstances, -1);
     }
 
+    ~DynamicInstances() override {
+        // Persist the template count since the DrawWriter could continue batching if a new
+        // compatible DynamicInstances object is created for the next draw.
+        this->updateTemplateCount();
+    }
+
     using Appender::reserve;
 
-    VertexWriter append(unsigned int indexCount, unsigned int instanceCount) {
-        SkASSERT(indexCount > 0);
+    template <typename V>
+    VertexWriter append(const V& vertexCount, unsigned int instanceCount) {
         VertexWriter w = this->Appender::append(instanceCount);
         // Record index count after appending instance data in case the append triggered a flush
-        // or earlier dynamic instances and the max index count is reset. This ensures that the
-        // just appended instances will be flushed with a template count at least 'indexCount'.
-        fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo<int>(indexCount) - 1);
+        // and the max index count is reset. However, the contents of 'w' will not have been flushed
+        // so 'fProxy' will account for 'vertexCount' when it is actually drawn.
+        fProxy << vertexCount;
         return w;
     }
+
+private:
+    void updateTemplateCount() {
+        unsigned int count = static_cast<unsigned int>(fProxy);
+        SkASSERT(count > 0);
+        fDrawer.fTemplateCount = std::min(fDrawer.fTemplateCount, -SkTo<int>(count) - 1);
+        // By resetting the proxy after updating the template count, the next batch will start over
+        // with the minimum required vertex count and grow from there.
+        fProxy = {};
+    }
+
+    void onFlush() override {
+        // Update the DrawWriter's template count before its flush() is invoked and the appender
+        // starts recording to a new buffer, which ensures the flush's draw call uses the most
+        // up-to-date vertex count derived from fProxy.
+        this->updateTemplateCount();
+    }
+
+    VertexCountProxy fProxy = {};
 };
 
 } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/render/BUILD.bazel b/src/gpu/graphite/render/BUILD.bazel
index c9eb8fd64e..4ccb1ffd00 100644
--- a/src/gpu/graphite/render/BUILD.bazel
+++ b/src/gpu/graphite/render/BUILD.bazel
@@ -78,6 +78,7 @@ generated_cc_atom(
     srcs = ["TessellateCurvesRenderStep.cpp"],
     visibility = ["//:__subpackages__"],
     deps = [
+        ":DynamicInstancesPatchAllocator_hdr",
         ":StencilAndCoverDSS_hdr",
         ":TessellateCurvesRenderStep_hdr",
         "//src/gpu/graphite:DrawGeometry_hdr",
@@ -100,6 +101,7 @@ generated_cc_atom(
     srcs = ["TessellateWedgesRenderStep.cpp"],
     visibility = ["//:__subpackages__"],
     deps = [
+        ":DynamicInstancesPatchAllocator_hdr",
         ":TessellateWedgesRenderStep_hdr",
         "//src/gpu/graphite:DrawGeometry_hdr",
         "//src/gpu/graphite:DrawWriter_hdr",
@@ -109,3 +111,14 @@ generated_cc_atom(
         "//src/gpu/tessellate:PatchWriter_hdr",
     ],
 )
+
+generated_cc_atom(
+    name = "DynamicInstancesPatchAllocator_hdr",
+    hdrs = ["DynamicInstancesPatchAllocator.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//src/gpu:BufferWriter_hdr",
+        "//src/gpu/graphite:DrawWriter_hdr",
+        "//src/gpu/tessellate:LinearTolerances_hdr",
+    ],
+)
diff --git a/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h b/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h
new file mode 100644
index 0000000000..dba4b1f2eb
--- /dev/null
+++ b/src/gpu/graphite/render/DynamicInstancesPatchAllocator.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
+#define skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
+
+#include "src/gpu/BufferWriter.h"
+#include "src/gpu/graphite/DrawWriter.h"
+#include "src/gpu/tessellate/LinearTolerances.h"
+
+namespace skgpu::graphite {
+
+// An adapter around DrawWriter::DynamicInstances that fits the API requirements of
+// skgpu::tess::PatchWriter's PatchAllocator template parameter.
+//
+// FixedCountVariant should be one of FixedCountCurves, FixedCountWedges, or FixedCountStrokes
+// (technically any class with a static VertexCount(const LinearTolerances&) function).
+template <typename FixedCountVariant>
+class DynamicInstancesPatchAllocator {
+public:
+    // 'stride' is provided by PatchWriter.
+    // 'writer' is the DrawWriter that the RenderStep can append instance data to,
+    // 'fixedVertexBuffer' and 'fixedIndexBuffer' are the bindings for the instance template that
+    // is passed to DrawWriter::DynamicInstances.
+    DynamicInstancesPatchAllocator(size_t stride,
+                                   DrawWriter& writer,
+                                   BindBufferInfo fixedVertexBuffer,
+                                   BindBufferInfo fixedIndexBuffer,
+                                   unsigned int reserveCount)
+            : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
+        SkASSERT(stride == writer.instanceStride());
+        // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
+        // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
+        fInstances.reserve(reserveCount);
+    }
+
+    VertexWriter append(const tess::LinearTolerances& tolerances) {
+        return fInstances.append(tolerances, 1);
+    }
+
+private:
+    struct LinearToleranceProxy {
+        operator unsigned int() const { return FixedCountVariant::VertexCount(fTolerances); }
+        void operator <<(const tess::LinearTolerances& t) { fTolerances.accumulate(t); }
+
+        tess::LinearTolerances fTolerances;
+    };
+
+    DrawWriter::DynamicInstances<LinearToleranceProxy> fInstances;
+};
+
+}  // namespace skgpu::graphite
+
+#endif // skgpu_graphite_render_DynamicInstancesPatchAllocator_DEFINED
diff --git a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
index fc03c42caf..79759812fe 100644
--- a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
+++ b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
@@ -9,6 +9,7 @@
 
 #include "src/gpu/graphite/DrawGeometry.h"
 #include "src/gpu/graphite/DrawWriter.h"
+#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h"
 #include "src/gpu/graphite/render/StencilAndCoverDSS.h"
 
 #include "src/gpu/tessellate/AffineMatrix.h"
@@ -21,38 +22,12 @@ namespace {
 
 using namespace skgpu::tess;
 
-// TODO: This can be shared by the other path tessellators if PatchWriter can provide the correct
-// index count based on its traits (curve, wedge, or stroke).
-// Satisfies API requirements for PatchAllocator template to PatchWriter, using
-// DrawWriter::DynamicInstances.
-struct DrawWriterAllocator {
-    DrawWriterAllocator(size_t stride, // required for PatchAllocator
-                        DrawWriter& writer,
-                        BindBufferInfo fixedVertexBuffer,
-                        BindBufferInfo fixedIndexBuffer,
-                        unsigned int reserveCount)
-            : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
-        SkASSERT(writer.instanceStride() == stride);
-        // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
-        // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
-        fInstances.reserve(reserveCount);
-    }
-
-    VertexWriter append(const LinearTolerances& tolerances) {
-        // TODO (skbug.com/13056): Converting tolerances into an index count for every instance is
-        // wasteful; it only has to be computed when we flush.
-        return fInstances.append(FixedCountCurves::VertexCount(tolerances), 1);
-    }
-
-    DrawWriter::DynamicInstances fInstances;
-};
-
 // No fan point or stroke params, since this is for filled curves (not strokes or wedges)
 // No explicit curve type, since we assume infinity is supported on GPUs using graphite
 // No color or wide color attribs, since it might always be part of the PaintParams
 // or we'll add a color-only fast path to RenderStep later.
 static constexpr PatchAttribs kAttribs = PatchAttribs::kPaintDepth;
-using Writer = PatchWriter<DrawWriterAllocator,
+using Writer = PatchWriter<DynamicInstancesPatchAllocator<FixedCountCurves>,
                            Required<PatchAttribs::kPaintDepth>,
                            AddTrianglesWhenChopping,
                            DiscardFlatCurves>;
diff --git a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
index 7dfd36c931..690a19cc7f 100644
--- a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
+++ b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
@@ -9,6 +9,7 @@
 
 #include "src/gpu/graphite/DrawGeometry.h"
 #include "src/gpu/graphite/DrawWriter.h"
+#include "src/gpu/graphite/render/DynamicInstancesPatchAllocator.h"
 
 #include "src/gpu/tessellate/AffineMatrix.h"
 #include "src/gpu/tessellate/FixedCountBufferUtils.h"
@@ -21,30 +22,6 @@ namespace {
 
 using namespace skgpu::tess;
 
-// TODO: This is very similar to DrawWriterAllocator in TessellateCurveRenderStep except that the
-// index count is calculated differently. These will be merged once skbug.com/13056 is resolved.
-struct DrawWriterAllocator {
-    DrawWriterAllocator(size_t stride, // required for PatchAllocator
-                        DrawWriter& writer,
-                        BindBufferInfo fixedVertexBuffer,
-                        BindBufferInfo fixedIndexBuffer,
-                        unsigned int reserveCount)
-            : fInstances(writer, fixedVertexBuffer, fixedIndexBuffer) {
-        SkASSERT(writer.instanceStride() == stride);
-        // TODO: Is it worth re-reserving large chunks after this preallocation is used up? Or will
-        // appending 1 at a time be fine since it's coming from a large vertex buffer alloc anyway?
-        fInstances.reserve(reserveCount);
-    }
-
-    VertexWriter append(const LinearTolerances& tolerances) {
-        // TODO (skbug.com/13056): Converting tolerances into an index count for every instance is
-        // wasteful; it only has to be computed when we flush.
-        return fInstances.append(FixedCountWedges::VertexCount(tolerances), 1);
-    }
-
-    DrawWriter::DynamicInstances fInstances;
-};
-
 // Only kFanPoint, no stroke params, since this is for filled wedges.
 // No explicit curve type, since we assume infinity is supported on GPUs using graphite
 // No color or wide color attribs, since it might always be part of the PaintParams
@@ -52,7 +29,7 @@ struct DrawWriterAllocator {
 static constexpr PatchAttribs kAttribs = PatchAttribs::kFanPoint |
                                          PatchAttribs::kPaintDepth;
 
-using Writer = PatchWriter<DrawWriterAllocator,
+using Writer = PatchWriter<DynamicInstancesPatchAllocator<FixedCountWedges>,
                            Required<PatchAttribs::kFanPoint>,
                            Required<PatchAttribs::kPaintDepth>>;
 
diff --git a/src/gpu/tessellate/PatchWriter.h b/src/gpu/tessellate/PatchWriter.h
index d31041ddea..d2cc59d0ae 100644
--- a/src/gpu/tessellate/PatchWriter.h
+++ b/src/gpu/tessellate/PatchWriter.h
@@ -56,8 +56,6 @@ namespace skgpu::tess {
  *
  * In addition to variable traits, PatchWriter's first template argument defines the type used for
  * allocating the GPU instance data. The templated "PatchAllocator" can be any type that provides:
- *    // The stride of each instance in bytes
- *    size_t stride() const;
  *    // A GPU-backed vertex writer for a single instance worth of data. The provided
  *    // LinearTolerances value represents the tolerances for the curve that will be written to the
  *    // returned vertex space.
@@ -303,7 +301,7 @@ public:
             // time the deferred patch was recorded.
             fTolerances.setParametricSegments(fDeferredPatch.fN_p4);
             if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
-                vw << VertexWriter::Array<char>(fDeferredPatch.fData, fPatchAllocator.stride());
+                vw << VertexWriter::Array<char>(fDeferredPatch.fData, PatchStride(fAttribs));
             }
         }
 
@@ -499,11 +497,11 @@ private:
         if constexpr (kTrackJoinControlPoints) {
             if (fDeferredPatch.fMustDefer) {
                 SkASSERT(!fDeferredPatch.hasPending());
-                SkASSERT(fPatchAllocator.stride() <= kMaxStride);
+                SkASSERT(PatchStride(fAttribs) <= kMaxStride);
                 // Save the computed parametric segment tolerance value so that we can pass that to
                 // the PatchAllocator when flushing the deferred patch.
                 fDeferredPatch.fN_p4 = fTolerances.numParametricSegments_p4();
-                return {fDeferredPatch.fData, fPatchAllocator.stride()};
+                return {fDeferredPatch.fData, PatchStride(fAttribs)};
             }
         }
         return fPatchAllocator.append(fTolerances);