Reland "Extract per-edge quad vertex tesselation code into reusable interface"

This is a reland of fc3784bd6c Original change's description: > Extract per-edge quad vertex tesselation code into reusable interface > > This moves the vertex templates and the edge outset/tessellation code into a new GrPerEdgeAAQuadHelper h/cpp file. The vertex template hierarchy has been expanded to include an optional local coordinate type: void, SkPoint, or SkPoint3. The texture op only uses SkPoint for its local coordinates but the regular rect op will need void and SkPoint3 as well. > > A large part of the added code is providing the tessellation specializations for those new local coordinate types. > > Bug: skia: > Change-Id: Id8cf2a17342f30b299b16be95e341d4991951c38 > Reviewed-on: https://skia-review.googlesource.com/c/164611 > Commit-Queue: Michael Ludwig <michaelludwig@google.com> > Reviewed-by: Brian Osman <brianosman@google.com> > Reviewed-by: Brian Salomon <bsalomon@google.com> Bug: skia: Change-Id: I9cf9fe2e3ccdacc396290b39f839e790a117fa8c Reviewed-on: https://skia-review.googlesource.com/c/165781 Reviewed-by: Brian Osman <brianosman@google.com> Commit-Queue: Michael Ludwig <michaelludwig@google.com>
2018-10-29 11:09:29 -04:00 · 2018-10-29 11:09:29 -04:00 · 460eb5e746
commit 460eb5e746
parent 0ac0eddd88
4 changed files with 537 additions and 376 deletions
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@ -270,6 +270,8 @@ skia_gpu_sources = [
  "$_src/gpu/ops/GrOp.h",
  "$_src/gpu/ops/GrOvalOpFactory.cpp",
  "$_src/gpu/ops/GrOvalOpFactory.h",
+  "$_src/gpu/ops/GrQuadPerEdgeAA.cpp",
+  "$_src/gpu/ops/GrQuadPerEdgeAA.h",
  "$_src/gpu/ops/GrRectOpFactory.h",
  "$_src/gpu/ops/GrRegionOp.cpp",
  "$_src/gpu/ops/GrRegionOp.h",
--- a/src/gpu/ops/GrQuadPerEdgeAA.cpp
+++ b/src/gpu/ops/GrQuadPerEdgeAA.cpp
@ -0,0 +1,369 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrQuadPerEdgeAA.h"
+#include "GrQuad.h"
+#include "SkNx.h"
+
+namespace {
+
+// This computes the four edge equations for a quad, then outsets them and optionally computes a new
+// quad as the intersection points of the outset edges. 'x' and 'y' contain the original points as
+// input and the outset points as output. 'a', 'b', and 'c' are the edge equation coefficients on
+// output. The values in x, y, u, v, and r are possibly updated if outsetting is needed.
+// r is the local position's w component if it exists.
+static void compute_quad_edges_and_outset_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y, Sk4f* a,
+                                                   Sk4f* b, Sk4f* c, Sk4f* u, Sk4f* v, Sk4f* r,
+                                                   int uvrChannelCount, bool outsetCorners) {
+    SkASSERT(uvrChannelCount == 0 || uvrChannelCount == 2 || uvrChannelCount == 3);
+
+    static constexpr auto fma = SkNx_fma<4, float>;
+    // These rotate the points/edge values either clockwise or counterclockwise assuming tri strip
+    // order.
+    auto nextCW  = [](const Sk4f& v) { return SkNx_shuffle<2, 0, 3, 1>(v); };
+    auto nextCCW = [](const Sk4f& v) { return SkNx_shuffle<1, 3, 0, 2>(v); };
+
+    // Compute edge equations for the quad.
+    auto xnext = nextCCW(*x);
+    auto ynext = nextCCW(*y);
+    // xdiff and ydiff will comprise the normalized vectors pointing along each quad edge.
+    auto xdiff = xnext - *x;
+    auto ydiff = ynext - *y;
+    auto invLengths = fma(xdiff, xdiff, ydiff * ydiff).rsqrt();
+    xdiff *= invLengths;
+    ydiff *= invLengths;
+
+    // Use above vectors to compute edge equations.
+    *c = fma(xnext, *y,  -ynext * *x) * invLengths;
+    // Make sure the edge equations have their normals facing into the quad in device space.
+    auto test = fma(ydiff, nextCW(*x), fma(-xdiff, nextCW(*y), *c));
+    if ((test < Sk4f(0)).anyTrue()) {
+        *a = -ydiff;
+        *b = xdiff;
+        *c = -*c;
+    } else {
+        *a = ydiff;
+        *b = -xdiff;
+    }
+    // Outset the edge equations so aa coverage evaluates to zero half a pixel away from the
+    // original quad edge.
+    *c += 0.5f;
+
+    if (aaFlags != GrQuadAAFlags::kAll) {
+        // This order is the same order the edges appear in xdiff/ydiff and therefore as the
+        // edges in a/b/c.
+        auto mask = Sk4f(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
+                         GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
+                         GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
+                         GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
+        // Outset edge equations for masked out edges another pixel so that they always evaluate
+        // >= 1.
+        *c += (1.f - mask);
+        if (outsetCorners) {
+            // Do the vertex outset.
+            mask *= 0.5f;
+            auto maskCW = nextCW(mask);
+            *x += maskCW * -xdiff + mask * nextCW(xdiff);
+            *y += maskCW * -ydiff + mask * nextCW(ydiff);
+            if (uvrChannelCount > 0) {
+                // We want to extend the texture coords by the same proportion as the positions.
+                maskCW *= invLengths;
+                mask *= nextCW(invLengths);
+                Sk4f udiff = nextCCW(*u) - *u;
+                Sk4f vdiff = nextCCW(*v) - *v;
+                *u += maskCW * -udiff + mask * nextCW(udiff);
+                *v += maskCW * -vdiff + mask * nextCW(vdiff);
+                if (uvrChannelCount == 3) {
+                    Sk4f rdiff = nextCCW(*r) - *r;
+                    *r += maskCW * -rdiff + mask * nextCW(rdiff);
+                }
+            }
+        }
+    } else if (outsetCorners) {
+        *x += 0.5f * (-xdiff + nextCW(xdiff));
+        *y += 0.5f * (-ydiff + nextCW(ydiff));
+        if (uvrChannelCount > 0) {
+            Sk4f t = 0.5f * invLengths;
+            Sk4f udiff = nextCCW(*u) - *u;
+            Sk4f vdiff = nextCCW(*v) - *v;
+            *u += t * -udiff + nextCW(t) * nextCW(udiff);
+            *v += t * -vdiff + nextCW(t) * nextCW(vdiff);
+            if (uvrChannelCount == 3) {
+                Sk4f rdiff = nextCCW(*r) - *r;
+                *r += t * -rdiff + nextCW(t) * nextCW(rdiff);
+            }
+        }
+    }
+}
+
+// Generalizes the above function to extrapolate local coords such that after perspective division
+// of the device coordinate, the original local coordinate value is at the original un-outset
+// device position. r is the local coordinate's w component.
+static void compute_quad_edges_and_outset_persp_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y,
+                                                         Sk4f* w, Sk4f* a, Sk4f* b, Sk4f* c,
+                                                         Sk4f* u, Sk4f* v, Sk4f* r,
+                                                         int uvrChannelCount) {
+    SkASSERT(uvrChannelCount == 0 || uvrChannelCount == 2 || uvrChannelCount == 3);
+
+    auto iw = (*w).invert();
+    auto x2d = (*x) * iw;
+    auto y2d = (*y) * iw;
+    // Don't compute outset corners in the normalized space, which means u, v, and r don't need
+    // to be provided here (outset separately below).
+    compute_quad_edges_and_outset_vertices(aaFlags, &x2d, &y2d, a, b, c, nullptr, nullptr, nullptr,
+                                           /* uvr ct */ 0, /* outsetCorners */ false);
+
+    static const float kOutset = 0.5f;
+    if ((GrQuadAAFlags::kLeft | GrQuadAAFlags::kRight) & aaFlags) {
+        // For each entry in x the equivalent entry in opX is the left/right opposite and so on.
+        Sk4f opX = SkNx_shuffle<2, 3, 0, 1>(*x);
+        Sk4f opW = SkNx_shuffle<2, 3, 0, 1>(*w);
+        Sk4f opY = SkNx_shuffle<2, 3, 0, 1>(*y);
+        // vx/vy holds the device space left-to-right vectors along top and bottom of the quad.
+        Sk2f vx = SkNx_shuffle<2, 3>(x2d) - SkNx_shuffle<0, 1>(x2d);
+        Sk2f vy = SkNx_shuffle<2, 3>(y2d) - SkNx_shuffle<0, 1>(y2d);
+        Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
+        // For each device space corner, devP, label its left/right opposite device space point
+        // opDevPt. The new device space point is opDevPt + s (devPt - opDevPt) where s is
+        // (length(devPt - opDevPt) + 0.5) / length(devPt - opDevPt);
+        Sk4f s = SkNx_shuffle<0, 1, 0, 1>((len + kOutset) / len);
+        // Compute t in homogeneous space from s using similar triangles so that we can produce
+        // homogeneous outset vertices for perspective-correct interpolation.
+        Sk4f sOpW = s * opW;
+        Sk4f t = sOpW / (sOpW + (1.f - s) * (*w));
+        // mask is used to make the t values be 1 when the left/right side is not antialiased.
+        Sk4f mask(GrQuadAAFlags::kLeft & aaFlags  ? 1.f : 0.f,
+                  GrQuadAAFlags::kLeft & aaFlags  ? 1.f : 0.f,
+                  GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f,
+                  GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
+        t = t * mask + (1.f - mask);
+        *x = opX + t * (*x - opX);
+        *y = opY + t * (*y - opY);
+        *w = opW + t * (*w - opW);
+
+        if (uvrChannelCount > 0) {
+            Sk4f opU = SkNx_shuffle<2, 3, 0, 1>(*u);
+            Sk4f opV = SkNx_shuffle<2, 3, 0, 1>(*v);
+            *u = opU + t * (*u - opU);
+            *v = opV + t * (*v - opV);
+            if (uvrChannelCount == 3) {
+                Sk4f opR = SkNx_shuffle<2, 3, 0, 1>(*r);
+                *r = opR + t * (*r - opR);
+            }
+        }
+
+        if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
+            // Update the 2D points for the top/bottom calculation.
+            iw = (*w).invert();
+            x2d = (*x) * iw;
+            y2d = (*y) * iw;
+        }
+    }
+
+    if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
+        // This operates the same as above but for top/bottom rather than left/right.
+        Sk4f opX = SkNx_shuffle<1, 0, 3, 2>(*x);
+        Sk4f opW = SkNx_shuffle<1, 0, 3, 2>(*w);
+        Sk4f opY = SkNx_shuffle<1, 0, 3, 2>(*y);
+
+        Sk2f vx = SkNx_shuffle<1, 3>(x2d) - SkNx_shuffle<0, 2>(x2d);
+        Sk2f vy = SkNx_shuffle<1, 3>(y2d) - SkNx_shuffle<0, 2>(y2d);
+        Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
+
+        Sk4f s = SkNx_shuffle<0, 0, 1, 1>((len + kOutset) / len);
+
+        Sk4f sOpW = s * opW;
+        Sk4f t = sOpW / (sOpW + (1.f - s) * (*w));
+
+        Sk4f mask(GrQuadAAFlags::kTop    & aaFlags ? 1.f : 0.f,
+                  GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
+                  GrQuadAAFlags::kTop    & aaFlags ? 1.f : 0.f,
+                  GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f);
+        t = t * mask + (1.f - mask);
+        *x = opX + t * (*x - opX);
+        *y = opY + t * (*y - opY);
+        *w = opW + t * (*w - opW);
+
+        if (uvrChannelCount > 0) {
+            Sk4f opU = SkNx_shuffle<1, 0, 3, 2>(*u);
+            Sk4f opV = SkNx_shuffle<1, 0, 3, 2>(*v);
+            *u = opU + t * (*u - opU);
+            *v = opV + t * (*v - opV);
+            if (uvrChannelCount == 3) {
+                Sk4f opR = SkNx_shuffle<1, 0, 3, 2>(*r);
+                *r = opR + t * (*r - opR);
+            }
+        }
+    }
+}
+
+// Fast path for non-AA quads batched into an AA op. Since they are part of the AA op, the vertices
+// need to have valid edge equations that ensure coverage is set to 1. To get perspective
+// interpolation of the edge distance, the vertex shader outputs d*w and then multiplies by 1/w in
+// the fragment shader. For non-AA edges, the edge equation can be simplified to 0*x/w + y/w + c >=
+// 1, so the vertex shader outputs c*w. The quad is sent as two triangles, so a fragment is the
+// interpolation between 3 of the 4 vertices. If iX are the weights for the 3 involved quad
+// vertices, then the fragment shader's state is:
+//   f_cw = c * (iA*wA + iB*wB + iC*wC) and f_1/w = iA/wA + iB/wB + iC/wC
+//   (where A,B,C are chosen from {1,2,3, 4})
+// When there's no perspective, then f_cw*f_1/w = c and setting c = 1 guarantees a proper non-AA
+// edge. Unfortunately when there is perspective, f_cw*f_1/w != c unless the fragment is at a
+// vertex. We must pick a c such that f_cw*f_1/w >= 1 across the whole primitive.
+// Let n = min(w1,w2,w3,w4) and m = max(w1,w2,w3,w4) and rewrite
+//   f_1/w=(iA*wB*wC + iB*wA*wC + iC*wA*wB) / (wA*wB*wC)
+// Since the iXs are weights for the interior of the primitive, then we have:
+//   n <= (iA*wA + iB*wB + iC*wC) <= m and
+//   n^2 <= (iA*wB*wC + iB*wA*wC + iC*wA*wB) <= m^2 and
+//   n^3 <= wA*wB*wC <= m^3 regardless of the choice of A,B, and C
+// Thus if we set c = m^3/n^3, it guarantees f_cw*f_1/w >= 1 for any perspective.
+static SkPoint3 compute_non_aa_persp_edge_coeffs(const Sk4f& w) {
+    float n = w.min();
+    float m = w.max();
+    return {0.f, 0.f, (m * m * m) / (n * n * n)};
+}
+
+// When there's guaranteed no perspective, the edge coefficients for non-AA quads is constant
+static constexpr SkPoint3 kNonAANoPerspEdgeCoeffs = {0, 0, 1};
+
+// This packs the four quad vertices' values for a given channel (the data) into a block. Returns
+// the offset for the next block to be written to localStorage
+static int store(const Sk4f& data, float* localStorage, int offset) {
+    data.store(localStorage + offset);
+    return offset + 4;
+}
+
+// This unpacks dimCt values from a series of channels. By initializing offset from 0 to 3 (plus
+// any channels' offsets to skip over), the particular quad vertex can be accessed. Returns the
+// offset for the next channel of data in localStorage.
+static int load(const float* localStorage, int offset, float* coordOut, int dimCt) {
+    for (int i = 0; i < dimCt; i++) {
+        coordOut[i] = localStorage[offset];
+        offset += 4;
+    }
+    return offset;
+}
+
+} // anonymous namespace
+
+void GrQuadPerEdgeAA::TessellateImpl(void* vertices, size_t vertexSize, float* localStorage,
+        const GrPerspQuad& deviceQuad, int posDim, size_t posOffset, size_t posSize,
+        const void* color, size_t colorOffset, size_t colorSize,
+        const GrPerspQuad& srcQuad, int srcDim, size_t srcOffset, size_t srcSize,
+        const void* domain, size_t domainOffset, size_t domainSize,
+        GrQuadAAFlags aaFlags, size_t aaOffset, size_t aaSize) {
+    // Make sure the device and local positions are dimensions that are supported
+    SkASSERT(posDim == 2 || posDim == 3);
+    SkASSERT(srcDim == 0 || srcDim == 2 || srcDim == 3);
+    // Make sure that the position sizes are the proper multiples of sizeof(float) since we copy
+    // floats directly into the block without converting types
+    SkASSERT(posSize == posDim * sizeof(float));
+    SkASSERT(srcSize == srcDim * sizeof(float));
+    // Make sure the component sizes completely fill the vertex
+    SkASSERT(vertexSize == posSize + colorSize + srcSize + domainSize + aaSize);
+
+    // Load position data into Sk4fs (always x, y and maybe w)
+    Sk4f x = deviceQuad.x4f();
+    Sk4f y = deviceQuad.y4f();
+    Sk4f w;
+    if (posDim == 3) {
+        w = deviceQuad.w4f();
+    }
+
+    // Load local position data into Sk4fs (either none, just u,v or all three)
+    Sk4f u, v, r;
+    if (srcDim > 0) {
+        u = srcQuad.x4f();
+        v = srcQuad.y4f();
+
+        if (srcDim == 3) {
+            r = srcQuad.w4f();
+        }
+    }
+
+    Sk4f a, b, c;
+    if (aaSize) {
+        // Must calculate edges and possibly outside the positions
+        if (aaFlags == GrQuadAAFlags::kNone) {
+            // A non-AA quad that got batched into an AA group, so its edges will be the same for
+            // all four vertices and it does not need to be outset
+            SkPoint3 edgeCoeffs;
+            if (posDim == 3) {
+                edgeCoeffs = compute_non_aa_persp_edge_coeffs(w);
+            } else {
+                edgeCoeffs = kNonAANoPerspEdgeCoeffs;
+            }
+
+            // Copy the coefficients into all four equations
+            a = edgeCoeffs.fX;
+            b = edgeCoeffs.fY;
+            c = edgeCoeffs.fZ;
+        } else if (posDim == 2) {
+            // For simplicity, pointers to u, v, and r are always provided, but srcDim
+            // ensures that only loaded Sk4fs are modified in the compute functions.
+            compute_quad_edges_and_outset_vertices(
+                    aaFlags, &x, &y, &a, &b, &c, &u, &v, &r, srcDim, /* outset */ true);
+        } else {
+            compute_quad_edges_and_outset_persp_vertices(
+                    aaFlags, &x, &y, &w, &a, &b, &c, &u, &v, &r, srcDim);
+        }
+    }
+
+    // It is faster to unpack the Sk4fs all at once than access their components out of order.
+    int offset = store(x, localStorage, 0);
+    offset = store(y, localStorage, offset);
+    if (posDim == 3) {
+        offset = store(w, localStorage, offset);
+    }
+    if (srcDim > 0) {
+        offset = store(u, localStorage, offset);
+        offset = store(v, localStorage, offset);
+        if (srcDim == 3) {
+            offset = store(w, localStorage, offset);
+        }
+    }
+    int edgeOffset = offset; // The 4 edges are separate from the 4 vertices
+    if (aaSize) {
+        offset = store(a, localStorage, offset);
+        offset = store(b, localStorage, offset);
+        offset = store(c, localStorage, offset);
+    }
+    // Now rearrange the unpacked buffer into the vertex layout
+    char* vb = reinterpret_cast<char*>(vertices);
+    for (int i = 0; i < 4; ++i) {
+        // Starting the offset at i makes sure that all loads read the data for the i^th vertex
+        offset = i;
+
+        // NOTE: while this code uses explicit offsets to make it independent of the actual
+        // vertex layout, it is a good idea to keep the writes in the same order as the fields
+
+        // save position
+        offset = load(localStorage, offset, reinterpret_cast<float*>(vb + posOffset), posDim);
+        // save color
+        if (colorSize) {
+            memcpy(vb + colorOffset, color, colorSize);
+        }
+        // save local position
+        if (srcDim) {
+            offset = load(localStorage, offset, reinterpret_cast<float*>(vb + srcOffset), srcDim);
+        }
+        // save the domain
+        if (domainSize) {
+            memcpy(vb + domainOffset, domain, domainSize);
+        }
+
+        // save the edges
+        if (aaSize) {
+            float* edgeBuffer = reinterpret_cast<float*>(vb + aaOffset);
+            for (int j = 0; j < 4; j++) {
+                load(localStorage, edgeOffset + j, edgeBuffer, 3);
+                edgeBuffer += 3;
+            }
+        }
+
+        vb += vertexSize;
+    }
+}
--- a/src/gpu/ops/GrQuadPerEdgeAA.h
+++ b/src/gpu/ops/GrQuadPerEdgeAA.h
@ -0,0 +1,100 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrQuadPerEdgeAA_DEFINED
+#define GrQuadPerEdgeAA_DEFINED
+
+#include "GrColor.h"
+#include "GrSamplerState.h"
+#include "GrTypesPriv.h"
+#include "SkPoint.h"
+#include "SkPoint3.h"
+
+class GrPerspQuad;
+
+class GrQuadPerEdgeAA {
+public:
+    enum class Domain : bool { kNo = false, kYes = true };
+
+    // The vertex template provides a clean way of specifying the layout and components of a vertex
+    // for a per-edge aa quad. However, because there are so many permutations possible, the struct
+    // is defined this way to take away all layout control from the compiler and make
+    // sure that it matches what we need to send to the GPU.
+    //
+    // It is expected that most code using these vertices will only need to call the templated
+    // Tessellate() function with an appropriately sized vertex buffer and not need to modify or
+    // read the fields of a particular vertex.
+    template <int PosDim, typename C, int LocalPosDim, Domain D, GrAA AA>
+    struct Vertex {
+        using Color = C;
+        static constexpr GrAA kAA = AA;
+        static constexpr Domain kDomain = D;
+        static constexpr size_t kPositionDim = PosDim;
+        static constexpr size_t kLocalPositionDim = LocalPosDim;
+
+        static constexpr size_t kPositionOffset = 0;
+        static constexpr size_t kPositionSize = PosDim * sizeof(float);
+
+        static constexpr size_t kColorOffset = kPositionOffset + kPositionSize;
+        static constexpr size_t kColorSize = sizeof(Color);
+
+        static constexpr size_t kLocalPositionOffset = kColorOffset + kColorSize;
+        static constexpr size_t kLocalPositionSize = LocalPosDim * sizeof(float);
+
+        static constexpr size_t kDomainOffset = kLocalPositionOffset + kLocalPositionSize;
+        static constexpr size_t kDomainSize = D == Domain::kYes ? sizeof(SkRect) : 0;
+
+        static constexpr size_t kAAOffset = kDomainOffset + kDomainSize;
+        static constexpr size_t kAASize = AA == GrAA::kYes ? 4 * sizeof(SkPoint3) : 0;
+
+        static constexpr size_t kVertexSize = kAAOffset + kAASize;
+
+        // Make sure sizeof(Vertex<...>) == kVertexSize
+        char fData[kVertexSize];
+    };
+
+    // Tessellate the given quad specification into the vertices buffer. If the specific vertex
+    // type does not use color, local positions, domain, etc. then the passed in values used for
+    // that field will be ignored.
+    template<typename V>
+    static void Tessellate(V* vertices, const GrPerspQuad& deviceQuad, typename V::Color color,
+                           const GrPerspQuad& srcQuad, const SkRect& domain, GrQuadAAFlags aa) {
+        static_assert(sizeof(V) == V::kVertexSize, "Incorrect vertex size");
+        static constexpr bool useCoverageAA = V::kAA == GrAA::kYes;
+        float localStorage[4 * (V::kPositionDim + V::kLocalPositionDim + (useCoverageAA ? 3 : 0))];
+        TessellateImpl(vertices, V::kVertexSize, localStorage,
+                deviceQuad, V::kPositionDim, V::kPositionOffset, V::kPositionSize,
+                &color, V::kColorOffset, V::kColorSize,
+                srcQuad, V::kLocalPositionDim, V::kLocalPositionOffset, V::kLocalPositionSize,
+                &domain, V::kDomainOffset, V::kDomainSize,
+                aa, V::kAAOffset, V::kAASize);
+    }
+
+private:
+    // Don't let the "namespace" class be instantiated
+    GrQuadPerEdgeAA();
+
+    // Internal implementation that can handle all vertex template variations without being
+    // replicated by the template in order to keep code size down.
+    //
+    // This uses the field sizes to determine if particular data needs to be computed. The arguments
+    // are arranged so that the data and field specification match the field declaration order of
+    // the vertex type (pos, color, localPos, domain, aa).
+    //
+    // localStorage must be have a length > 4 * (devDimCt + srcDimCt + (aa ? 3 : 0)) and is assumed
+    // to be a pointer to a local variable in the wrapping template's stack. This is done instead of
+    // always allocating 36 floats in this function (36 is maximum needed). The minimum needed for a
+    // non-AA 2D quad with no local coordinates is just 8.
+    static void TessellateImpl(void* vertices, size_t vertexSize, float* localStorage,
+            const GrPerspQuad& deviceQuad, int posDim, size_t posOffset, size_t posSize,
+            const void* color, size_t colorOffset, size_t colorSize,
+            const GrPerspQuad& srcQuad, int srcDim, size_t srcOffset, size_t srcSize,
+            const void* domain, size_t domainOffset, size_t domainSize,
+            GrQuadAAFlags aaFlags, size_t aaOffset, size_t aaSize);
+};
+
+#endif // GrQuadPerEdgeAA_DEFINED
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@ -17,6 +17,7 @@
 #include "GrMeshDrawOp.h"
 #include "GrOpFlushState.h"
 #include "GrQuad.h"
+#include "GrQuadPerEdgeAA.h"
 #include "GrResourceProvider.h"
 #include "GrShaderCaps.h"
 #include "GrTexture.h"
@ -37,7 +38,7 @@

 namespace {

-enum class Domain : bool { kNo = false, kYes = true };
+using Domain = GrQuadPerEdgeAA::Domain;

 /**
 * Geometry Processor that draws a texture modulated by a vertex color (though, this is meant to be
@ -46,44 +47,13 @@ enum class Domain : bool { kNo = false, kYes = true };
 */
 class TextureGeometryProcessor : public GrGeometryProcessor {
 public:
-    template <typename Pos> struct VertexCommon {
-        using Position = Pos;
-        Position fPosition;
-        GrColor fColor;
-        SkPoint fTextureCoords;
-    };
-
-    template <typename Pos, Domain D> struct OptionalDomainVertex;
-    template <typename Pos>
-    struct OptionalDomainVertex<Pos, Domain::kNo> : VertexCommon<Pos> {
-        static constexpr Domain kDomain = Domain::kNo;
-    };
-    template <typename Pos>
-    struct OptionalDomainVertex<Pos, Domain::kYes> : VertexCommon<Pos> {
-        static constexpr Domain kDomain = Domain::kYes;
-        SkRect fTextureDomain;
-    };
-
-    template <typename Pos, Domain D, GrAA> struct OptionalAAVertex;
-    template <typename Pos, Domain D>
-    struct OptionalAAVertex<Pos, D, GrAA::kNo> : OptionalDomainVertex<Pos, D> {
-        static constexpr GrAA kAA = GrAA::kNo;
-    };
-    template <typename Pos, Domain D>
-    struct OptionalAAVertex<Pos, D, GrAA::kYes> : OptionalDomainVertex<Pos, D> {
-        static constexpr GrAA kAA = GrAA::kYes;
-        SkPoint3 fEdges[4];
-    };
-
-    template <typename Pos, Domain D, GrAA AA>
-    using Vertex = OptionalAAVertex<Pos, D, AA>;

    static sk_sp<GrGeometryProcessor> Make(GrTextureType textureType, GrPixelConfig textureConfig,
                                           const GrSamplerState::Filter filter,
                                           sk_sp<GrColorSpaceXform> textureColorSpaceXform,
                                           sk_sp<GrColorSpaceXform> paintColorSpaceXform,
-                                           bool coverageAA, bool perspective, Domain domain,
-                                           const GrShaderCaps& caps) {
+                                           bool coverageAA, bool perspective,
+                                           Domain domain, const GrShaderCaps& caps) {
        return sk_sp<TextureGeometryProcessor>(new TextureGeometryProcessor(
                textureType, textureConfig, filter, std::move(textureColorSpaceXform),
                std::move(paintColorSpaceXform), coverageAA, perspective, domain, caps));
@ -265,334 +235,6 @@ private:
    typedef GrGeometryProcessor INHERITED;
 };

-// This computes the four edge equations for a quad, then outsets them and optionally computes a new
-// quad as the intersection points of the outset edges. 'x' and 'y' contain the original points as
-// input and the outset points as output. 'a', 'b', and 'c' are the edge equation coefficients on
-// output. If outsetCorners is true then 'u' and 'v' should hold the texture coordinates on input
-// and will also be outset.
-static void compute_quad_edges_and_outset_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y, Sk4f* a,
-                                                   Sk4f* b, Sk4f* c, bool outsetCorners = false,
-                                                   Sk4f* u = nullptr, Sk4f* v = nullptr) {
-    static constexpr auto fma = SkNx_fma<4, float>;
-    // These rotate the points/edge values either clockwise or counterclockwise assuming tri strip
-    // order.
-    auto nextCW  = [](const Sk4f& v) { return SkNx_shuffle<2, 0, 3, 1>(v); };
-    auto nextCCW = [](const Sk4f& v) { return SkNx_shuffle<1, 3, 0, 2>(v); };
-
-    // Compute edge equations for the quad.
-    auto xnext = nextCCW(*x);
-    auto ynext = nextCCW(*y);
-    // xdiff and ydiff will comprise the normalized vectors pointing along each quad edge.
-    auto xdiff = xnext - *x;
-    auto ydiff = ynext - *y;
-    auto invLengths = fma(xdiff, xdiff, ydiff * ydiff).rsqrt();
-    xdiff *= invLengths;
-    ydiff *= invLengths;
-
-    // Use above vectors to compute edge equations.
-    *c = fma(xnext, *y,  -ynext * *x) * invLengths;
-    // Make sure the edge equations have their normals facing into the quad in device space.
-    auto test = fma(ydiff, nextCW(*x), fma(-xdiff, nextCW(*y), *c));
-    if ((test < Sk4f(0)).anyTrue()) {
-        *a = -ydiff;
-        *b = xdiff;
-        *c = -*c;
-    } else {
-        *a = ydiff;
-        *b = -xdiff;
-    }
-    // Outset the edge equations so aa coverage evaluates to zero half a pixel away from the
-    // original quad edge.
-    *c += 0.5f;
-
-    if (aaFlags != GrQuadAAFlags::kAll) {
-        // This order is the same order the edges appear in xdiff/ydiff and therefore as the
-        // edges in a/b/c.
-        auto mask = Sk4f(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
-                         GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
-                         GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
-                         GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
-        // Outset edge equations for masked out edges another pixel so that they always evaluate
-        // >= 1.
-        *c += (1.f - mask);
-        if (outsetCorners) {
-            // Do the vertex outset.
-            mask *= 0.5f;
-            auto maskCW = nextCW(mask);
-            *x += maskCW * -xdiff + mask * nextCW(xdiff);
-            *y += maskCW * -ydiff + mask * nextCW(ydiff);
-            // We want to extend the texture coords by the same proportion as the positions.
-            maskCW *= invLengths;
-            mask *= nextCW(invLengths);
-            Sk4f udiff = nextCCW(*u) - *u;
-            Sk4f vdiff = nextCCW(*v) - *v;
-            *u += maskCW * -udiff + mask * nextCW(udiff);
-            *v += maskCW * -vdiff + mask * nextCW(vdiff);
-        }
-    } else if (outsetCorners) {
-        *x += 0.5f * (-xdiff + nextCW(xdiff));
-        *y += 0.5f * (-ydiff + nextCW(ydiff));
-        Sk4f t = 0.5f * invLengths;
-        Sk4f udiff = nextCCW(*u) - *u;
-        Sk4f vdiff = nextCCW(*v) - *v;
-        *u += t * -udiff + nextCW(t) * nextCW(udiff);
-        *v += t * -vdiff + nextCW(t) * nextCW(vdiff);
-    }
-}
-
-namespace {
-// This is a class soley so it can be partially specialized (functions cannot be).
-template <typename V, GrAA AA = V::kAA, typename Position = typename V::Position>
-class VertexAAHandler;
-
-template<typename V> class VertexAAHandler<V, GrAA::kNo, SkPoint> {
-public:
-    static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
-                                            GrQuadAAFlags aaFlags, const SkRect& texRect) {
-        // Should be kNone for non-AA and kAll for MSAA.
-        SkASSERT(aaFlags == GrQuadAAFlags::kNone || aaFlags == GrQuadAAFlags::kAll);
-        SkASSERT(!quad.hasPerspective());
-        SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
-        for (int i = 0; i < 4; ++i) {
-            vertices[i].fPosition = {quad.x(i), quad.y(i)};
-        }
-    }
-};
-
-template<typename V> class VertexAAHandler<V, GrAA::kNo, SkPoint3> {
-public:
-    static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
-                                            GrQuadAAFlags aaFlags, const SkRect& texRect) {
-        // Should be kNone for non-AA and kAll for MSAA.
-        SkASSERT(aaFlags == GrQuadAAFlags::kNone || aaFlags == GrQuadAAFlags::kAll);
-        SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
-        for (int i = 0; i < 4; ++i) {
-            vertices[i].fPosition = quad.point(i);
-        }
-    }
-};
-
-template<typename V> class VertexAAHandler<V, GrAA::kYes, SkPoint> {
-public:
-    static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
-                                            GrQuadAAFlags aaFlags, const SkRect& texRect) {
-        SkASSERT(!quad.hasPerspective());
-        if (aaFlags == GrQuadAAFlags::kNone) {
-            for (int i = 0; i < 4; ++i) {
-                vertices[i].fPosition = {quad.x(i), quad.y(i)};
-                for (int j = 0; j < 4; ++j) {
-                    // This works because the position w components are known to be 1.
-                    vertices[i].fEdges[j] = {0, 0, 1};
-                }
-            }
-            SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
-            return;
-        }
-        auto x = quad.x4f();
-        auto y = quad.y4f();
-        Sk4f a, b, c;
-        Sk4f u{texRect.fLeft, texRect.fLeft, texRect.fRight, texRect.fRight};
-        Sk4f v{texRect.fTop, texRect.fBottom, texRect.fTop, texRect.fBottom};
-        compute_quad_edges_and_outset_vertices(aaFlags, &x, &y, &a, &b, &c, true, &u, &v);
-
-        // Faster to store the Sk4fs all at once rather than element-by-element into vertices.
-        float xs[4], ys[4], as[4], bs[4], cs[4], us[4], vs[4];
-        x.store(xs);
-        y.store(ys);
-        a.store(as);
-        b.store(bs);
-        c.store(cs);
-        u.store(us);
-        v.store(vs);
-        for (int i = 0; i < 4; ++i) {
-            vertices[i].fPosition = {xs[i], ys[i]};
-            vertices[i].fTextureCoords = {us[i], vs[i]};
-            for (int j = 0; j < 4; ++j) {
-                vertices[i].fEdges[j]  = {as[j], bs[j], cs[j]};
-            }
-        }
-    }
-};
-
-template<typename V> class VertexAAHandler<V, GrAA::kYes, SkPoint3> {
-public:
-    static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
-                                            GrQuadAAFlags aaFlags, const SkRect& texRect) {
-        auto x = quad.x4f();
-        auto y = quad.y4f();
-        auto iw = quad.iw4f();
-
-        if ((iw == Sk4f(1)).allTrue() && aaFlags == GrQuadAAFlags::kNone) {
-            for (int i = 0; i < 4; ++i) {
-                vertices[i].fPosition = quad.point(i);
-                for (int j = 0; j < 4; ++j) {
-                    // This works because the position w components are known to be 1.
-                    vertices[i].fEdges[j] = {0, 0, 1};
-                }
-            }
-            SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
-            return;
-        }
-        Sk4f a, b, c;
-        auto x2d = x * iw;
-        auto y2d = y * iw;
-        compute_quad_edges_and_outset_vertices(aaFlags, &x2d, &y2d, &a, &b, &c);
-        auto w = quad.w4f();
-        static const float kOutset = 0.5f;
-        Sk4f u{texRect.fLeft, texRect.fLeft, texRect.fRight, texRect.fRight};
-        Sk4f v{texRect.fTop, texRect.fBottom, texRect.fTop, texRect.fBottom};
-        if ((GrQuadAAFlags::kLeft | GrQuadAAFlags::kRight) & aaFlags) {
-            // For each entry in x the equivalent entry in opX is the left/right opposite and so on.
-            Sk4f opX = SkNx_shuffle<2, 3, 0, 1>(x);
-            Sk4f opW = SkNx_shuffle<2, 3, 0, 1>(w);
-            Sk4f opY = SkNx_shuffle<2, 3, 0, 1>(y);
-            // vx/vy holds the device space left-to-right vectors along top and bottom of the quad.
-            Sk2f vx = SkNx_shuffle<2, 3>(x2d) - SkNx_shuffle<0, 1>(x2d);
-            Sk2f vy = SkNx_shuffle<2, 3>(y2d) - SkNx_shuffle<0, 1>(y2d);
-            Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
-            // For each device space corner, devP, label its left/right opposite device space point
-            // opDevPt. The new device space point is opDevPt + s (devPt - opDevPt) where s is
-            // (length(devPt - opDevPt) + 0.5) / length(devPt - opDevPt);
-            Sk4f s = SkNx_shuffle<0, 1, 0, 1>((len + kOutset) / len);
-            // Compute t in homogeneous space from s using similar triangles so that we can produce
-            // homogeneous outset vertices for perspective-correct interpolation.
-            Sk4f sOpW = s * opW;
-            Sk4f t = sOpW / (sOpW + (1.f - s) * w);
-            // mask is used to make the t values be 1 when the left/right side is not antialiased.
-            Sk4f mask(GrQuadAAFlags::kLeft & aaFlags  ? 1.f : 0.f,
-                      GrQuadAAFlags::kLeft & aaFlags  ? 1.f : 0.f,
-                      GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f,
-                      GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
-            t = t * mask + (1.f - mask);
-            x = opX + t * (x - opX);
-            y = opY + t * (y - opY);
-            w = opW + t * (w - opW);
-
-            Sk4f opU = SkNx_shuffle<2, 3, 0, 1>(u);
-            Sk4f opV = SkNx_shuffle<2, 3, 0, 1>(v);
-            u = opU + t * (u - opU);
-            v = opV + t * (v - opV);
-            if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
-                // Update the 2D points for the top/bottom calculation.
-                iw = w.invert();
-                x2d = x * iw;
-                y2d = y * iw;
-            }
-        }
-
-        if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
-            // This operates the same as above but for top/bottom rather than left/right.
-            Sk4f opX = SkNx_shuffle<1, 0, 3, 2>(x);
-            Sk4f opW = SkNx_shuffle<1, 0, 3, 2>(w);
-            Sk4f opY = SkNx_shuffle<1, 0, 3, 2>(y);
-
-            Sk2f vx = SkNx_shuffle<1, 3>(x2d) - SkNx_shuffle<0, 2>(x2d);
-            Sk2f vy = SkNx_shuffle<1, 3>(y2d) - SkNx_shuffle<0, 2>(y2d);
-            Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
-
-            Sk4f s = SkNx_shuffle<0, 0, 1, 1>((len + kOutset) / len);
-
-            Sk4f sOpW = s * opW;
-            Sk4f t = sOpW / (sOpW + (1.f - s) * w);
-
-            Sk4f mask(GrQuadAAFlags::kTop    & aaFlags ? 1.f : 0.f,
-                      GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
-                      GrQuadAAFlags::kTop    & aaFlags ? 1.f : 0.f,
-                      GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f);
-            t = t * mask + (1.f - mask);
-            x = opX + t * (x - opX);
-            y = opY + t * (y - opY);
-            w = opW + t * (w - opW);
-
-            Sk4f opU = SkNx_shuffle<1, 0, 3, 2>(u);
-            Sk4f opV = SkNx_shuffle<1, 0, 3, 2>(v);
-            u = opU + t * (u - opU);
-            v = opV + t * (v - opV);
-        }
-        // Faster to store the Sk4fs all at once rather than element-by-element into vertices.
-        float xs[4], ys[4], ws[4], as[4], bs[4], cs[4], us[4], vs[4];
-        x.store(xs);
-        y.store(ys);
-        w.store(ws);
-        a.store(as);
-        b.store(bs);
-        c.store(cs);
-        u.store(us);
-        v.store(vs);
-        for (int i = 0; i < 4; ++i) {
-            vertices[i].fPosition = {xs[i], ys[i], ws[i]};
-            vertices[i].fTextureCoords = {us[i], vs[i]};
-            for (int j = 0; j < 4; ++j) {
-                vertices[i].fEdges[j] = {as[j], bs[j], cs[j]};
-            }
-        }
-    }
-};
-
-template <typename V, Domain D = V::kDomain> struct DomainAssigner;
-
-template <typename V> struct DomainAssigner<V, Domain::kYes> {
-    static void Assign(V* vertices, Domain domain, GrSamplerState::Filter filter,
-                       const SkRect& srcRect, GrSurfaceOrigin origin, float iw, float ih) {
-        static constexpr SkRect kLargeRect = {-2, -2, 2, 2};
-        SkRect domainRect;
-        if (domain == Domain::kYes) {
-            auto ltrb = Sk4f::Load(&srcRect);
-            if (filter == GrSamplerState::Filter::kBilerp) {
-                auto rblt = SkNx_shuffle<2, 3, 0, 1>(ltrb);
-                auto whwh = (rblt - ltrb).abs();
-                auto c = (rblt + ltrb) * 0.5f;
-                static const Sk4f kOffsets = {0.5f, 0.5f, -0.5f, -0.5f};
-                ltrb = (whwh < 1.f).thenElse(c, ltrb + kOffsets);
-            }
-            ltrb *= Sk4f(iw, ih, iw, ih);
-            if (origin == kBottomLeft_GrSurfaceOrigin) {
-                static const Sk4f kMul = {1.f, -1.f, 1.f, -1.f};
-                static const Sk4f kAdd = {0.f, 1.f, 0.f, 1.f};
-                ltrb = SkNx_shuffle<0, 3, 2, 1>(kMul * ltrb + kAdd);
-            }
-            ltrb.store(&domainRect);
-        } else {
-            domainRect = kLargeRect;
-        }
-        for (int i = 0; i < 4; ++i) {
-            vertices[i].fTextureDomain = domainRect;
-        }
-    }
-};
-
-template <typename V> struct DomainAssigner<V, Domain::kNo> {
-    static void Assign(V*, Domain domain, GrSamplerState::Filter, const SkRect&, GrSurfaceOrigin,
-                       float iw, float ih) {
-        SkASSERT(domain == Domain::kNo);
-    }
-};
-
-}  // anonymous namespace
-
-template <typename V>
-static void tessellate_quad(const GrPerspQuad& devQuad, GrQuadAAFlags aaFlags,
-                            const SkRect& srcRect, GrColor color, GrSurfaceOrigin origin,
-                            GrSamplerState::Filter filter, V* vertices, SkScalar iw, SkScalar ih,
-                            Domain domain) {
-    SkRect texRect = {
-            iw * srcRect.fLeft,
-            ih * srcRect.fTop,
-            iw * srcRect.fRight,
-            ih * srcRect.fBottom
-    };
-    if (origin == kBottomLeft_GrSurfaceOrigin) {
-        texRect.fTop = 1.f - texRect.fTop;
-        texRect.fBottom = 1.f - texRect.fBottom;
-    }
-    VertexAAHandler<V>::AssignPositionsAndTexCoords(vertices, devQuad, aaFlags, texRect);
-    vertices[0].fColor = color;
-    vertices[1].fColor = color;
-    vertices[2].fColor = color;
-    vertices[3].fColor = color;
-    DomainAssigner<V>::Assign(vertices, domain, filter, srcRect, origin, iw, ih);
-}
-
 static bool filter_has_effect_for_rect_stays_rect(const GrPerspQuad& quad, const SkRect& srcRect) {
    SkASSERT(quad.quadType() == GrQuadType::kRect_QuadType);
    float ql = quad.x(0);
@ -607,6 +249,52 @@ static bool filter_has_effect_for_rect_stays_rect(const GrPerspQuad& quad, const
           SkScalarFraction(qt) != SkScalarFraction(srcRect.fTop);
 }

+static SkRect compute_domain(Domain domain, GrSamplerState::Filter filter,
+                             GrSurfaceOrigin origin, const SkRect& srcRect, float iw, float ih) {
+    static constexpr SkRect kLargeRect = {-2, -2, 2, 2};
+    if (domain == Domain::kNo) {
+        // Either the quad has no domain constraint and is batched with a domain constrained op
+        // (in which case we want a domain that doesn't restrict normalized tex coords), or the
+        // entire op doesn't use the domain, in which case the returned value is ignored.
+        return kLargeRect;
+    }
+
+    auto ltrb = Sk4f::Load(&srcRect);
+    if (filter == GrSamplerState::Filter::kBilerp) {
+        auto rblt = SkNx_shuffle<2, 3, 0, 1>(ltrb);
+        auto whwh = (rblt - ltrb).abs();
+        auto c = (rblt + ltrb) * 0.5f;
+        static const Sk4f kOffsets = {0.5f, 0.5f, -0.5f, -0.5f};
+        ltrb = (whwh < 1.f).thenElse(c, ltrb + kOffsets);
+    }
+    ltrb *= Sk4f(iw, ih, iw, ih);
+    if (origin == kBottomLeft_GrSurfaceOrigin) {
+        static const Sk4f kMul = {1.f, -1.f, 1.f, -1.f};
+        static const Sk4f kAdd = {0.f, 1.f, 0.f, 1.f};
+        ltrb = SkNx_shuffle<0, 3, 2, 1>(kMul * ltrb + kAdd);
+    }
+
+    SkRect domainRect;
+    ltrb.store(&domainRect);
+    return domainRect;
+}
+
+static GrPerspQuad compute_src_quad(GrSurfaceOrigin origin, const SkRect& srcRect,
+                                    float iw, float ih) {
+    // Convert the pixel-space src rectangle into normalized texture coordinates
+    SkRect texRect = {
+        iw * srcRect.fLeft,
+        ih * srcRect.fTop,
+        iw * srcRect.fRight,
+        ih * srcRect.fBottom
+    };
+    if (origin == kBottomLeft_GrSurfaceOrigin) {
+        texRect.fTop = 1.f - texRect.fTop;
+        texRect.fBottom = 1.f - texRect.fBottom;
+    }
+    return GrPerspQuad(texRect, SkMatrix::I());
+}
+
 /**
 * Op that implements GrTextureOp::Make. It draws textured quads. Each quad can modulate against a
 * the texture by color. The blend with the destination is always src-over. The edges are non-AA.
@ -808,11 +496,11 @@ private:
        fDomain = static_cast<unsigned>(false);
    }

-    template <typename Pos, Domain D, GrAA AA>
+    template <int PosDim, Domain D, GrAA AA>
    void tess(void* v, const GrGeometryProcessor* gp, const GrTextureProxy* proxy, int start,
              int cnt) const {
        TRACE_EVENT0("skia", TRACE_FUNC);
-        using Vertex = TextureGeometryProcessor::Vertex<Pos, D, AA>;
+        using Vertex = GrQuadPerEdgeAA::Vertex<PosDim, GrColor, 2, D, AA>;
        SkASSERT(gp->debugOnly_vertexStride() == sizeof(Vertex));
        auto vertices = static_cast<Vertex*>(v);
        auto origin = proxy->origin();
@ -822,8 +510,10 @@ private:

        for (int i = start; i < start + cnt; ++i) {
            const auto q = fQuads[i];
-            tessellate_quad<Vertex>(q.quad(), q.aaFlags(), q.srcRect(), q.color(), origin,
-                                    this->filter(), vertices, iw, ih, q.domain());
+            GrPerspQuad srcQuad = compute_src_quad(origin, q.srcRect(), iw, ih);
+            SkRect domain = compute_domain(q.domain(), this->filter(), origin, q.srcRect(), iw, ih);
+            GrQuadPerEdgeAA::Tessellate<Vertex>(
+                    vertices, q.quad(), q.color(), srcQuad, domain, q.aaFlags());
            vertices += 4;
        }
    }
@ -886,24 +576,24 @@ private:
        }
        const auto* pipeline =
                target->allocPipeline(args, GrProcessorSet::MakeEmptySet(), std::move(clip));
-        using TessFn = decltype(&TextureOp::tess<SkPoint, Domain::kNo, GrAA::kNo>);
+        using TessFn = decltype(&TextureOp::tess<2, Domain::kNo, GrAA::kNo>);
 #define TESS_FN_AND_VERTEX_SIZE(Point, Domain, AA)                          \
    {                                                                       \
        &TextureOp::tess<Point, Domain, AA>,                                \
-                sizeof(TextureGeometryProcessor::Vertex<Point, Domain, AA>) \
+                sizeof(GrQuadPerEdgeAA::Vertex<Point, GrColor, 2, Domain, AA>) \
    }
        static constexpr struct {
            TessFn fTessFn;
            size_t fVertexSize;
        } kTessFnsAndVertexSizes[] = {
-                TESS_FN_AND_VERTEX_SIZE(SkPoint,  Domain::kNo,  GrAA::kNo),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint,  Domain::kNo,  GrAA::kYes),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint,  Domain::kYes, GrAA::kNo),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint,  Domain::kYes, GrAA::kYes),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kNo,  GrAA::kNo),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kNo,  GrAA::kYes),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kYes, GrAA::kNo),
-                TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kYes, GrAA::kYes),
+                TESS_FN_AND_VERTEX_SIZE(2, Domain::kNo,  GrAA::kNo),
+                TESS_FN_AND_VERTEX_SIZE(2, Domain::kNo,  GrAA::kYes),
+                TESS_FN_AND_VERTEX_SIZE(2, Domain::kYes, GrAA::kNo),
+                TESS_FN_AND_VERTEX_SIZE(2, Domain::kYes, GrAA::kYes),
+                TESS_FN_AND_VERTEX_SIZE(3, Domain::kNo,  GrAA::kNo),
+                TESS_FN_AND_VERTEX_SIZE(3, Domain::kNo,  GrAA::kYes),
+                TESS_FN_AND_VERTEX_SIZE(3, Domain::kYes, GrAA::kNo),
+                TESS_FN_AND_VERTEX_SIZE(3, Domain::kYes, GrAA::kYes),
        };
 #undef TESS_FN_AND_VERTEX_SIZE
        int tessFnIdx = 0;