Reland "Extract per-edge quad vertex tesselation code into reusable interface"

This is a reland of fc3784bd6c

Original change's description:
> Extract per-edge quad vertex tesselation code into reusable interface
> 
> This moves the vertex templates and the edge outset/tessellation code into a new GrPerEdgeAAQuadHelper h/cpp file. The vertex template hierarchy has been expanded to include an optional local coordinate type: void, SkPoint, or SkPoint3. The texture op only uses SkPoint for its local coordinates but the regular rect op will need void and SkPoint3 as well.
> 
> A large part of the added code is providing the tessellation specializations for those new local coordinate types.
> 
> Bug: skia:
> Change-Id: Id8cf2a17342f30b299b16be95e341d4991951c38
> Reviewed-on: https://skia-review.googlesource.com/c/164611
> Commit-Queue: Michael Ludwig <michaelludwig@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>

Bug: skia:
Change-Id: I9cf9fe2e3ccdacc396290b39f839e790a117fa8c
Reviewed-on: https://skia-review.googlesource.com/c/165781
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
Michael Ludwig 2018-10-29 11:09:29 -04:00 committed by Skia Commit-Bot
parent 0ac0eddd88
commit 460eb5e746
4 changed files with 537 additions and 376 deletions

View File

@ -270,6 +270,8 @@ skia_gpu_sources = [
"$_src/gpu/ops/GrOp.h",
"$_src/gpu/ops/GrOvalOpFactory.cpp",
"$_src/gpu/ops/GrOvalOpFactory.h",
"$_src/gpu/ops/GrQuadPerEdgeAA.cpp",
"$_src/gpu/ops/GrQuadPerEdgeAA.h",
"$_src/gpu/ops/GrRectOpFactory.h",
"$_src/gpu/ops/GrRegionOp.cpp",
"$_src/gpu/ops/GrRegionOp.h",

View File

@ -0,0 +1,369 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "GrQuadPerEdgeAA.h"
#include "GrQuad.h"
#include "SkNx.h"
namespace {
// This computes the four edge equations for a quad, then outsets them and optionally computes a new
// quad as the intersection points of the outset edges. 'x' and 'y' contain the original points as
// input and the outset points as output. 'a', 'b', and 'c' are the edge equation coefficients on
// output. The values in x, y, u, v, and r are possibly updated if outsetting is needed.
// r is the local position's w component if it exists.
static void compute_quad_edges_and_outset_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y, Sk4f* a,
Sk4f* b, Sk4f* c, Sk4f* u, Sk4f* v, Sk4f* r,
int uvrChannelCount, bool outsetCorners) {
SkASSERT(uvrChannelCount == 0 || uvrChannelCount == 2 || uvrChannelCount == 3);
static constexpr auto fma = SkNx_fma<4, float>;
// These rotate the points/edge values either clockwise or counterclockwise assuming tri strip
// order.
auto nextCW = [](const Sk4f& v) { return SkNx_shuffle<2, 0, 3, 1>(v); };
auto nextCCW = [](const Sk4f& v) { return SkNx_shuffle<1, 3, 0, 2>(v); };
// Compute edge equations for the quad.
auto xnext = nextCCW(*x);
auto ynext = nextCCW(*y);
// xdiff and ydiff will comprise the normalized vectors pointing along each quad edge.
auto xdiff = xnext - *x;
auto ydiff = ynext - *y;
auto invLengths = fma(xdiff, xdiff, ydiff * ydiff).rsqrt();
xdiff *= invLengths;
ydiff *= invLengths;
// Use above vectors to compute edge equations.
*c = fma(xnext, *y, -ynext * *x) * invLengths;
// Make sure the edge equations have their normals facing into the quad in device space.
auto test = fma(ydiff, nextCW(*x), fma(-xdiff, nextCW(*y), *c));
if ((test < Sk4f(0)).anyTrue()) {
*a = -ydiff;
*b = xdiff;
*c = -*c;
} else {
*a = ydiff;
*b = -xdiff;
}
// Outset the edge equations so aa coverage evaluates to zero half a pixel away from the
// original quad edge.
*c += 0.5f;
if (aaFlags != GrQuadAAFlags::kAll) {
// This order is the same order the edges appear in xdiff/ydiff and therefore as the
// edges in a/b/c.
auto mask = Sk4f(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
// Outset edge equations for masked out edges another pixel so that they always evaluate
// >= 1.
*c += (1.f - mask);
if (outsetCorners) {
// Do the vertex outset.
mask *= 0.5f;
auto maskCW = nextCW(mask);
*x += maskCW * -xdiff + mask * nextCW(xdiff);
*y += maskCW * -ydiff + mask * nextCW(ydiff);
if (uvrChannelCount > 0) {
// We want to extend the texture coords by the same proportion as the positions.
maskCW *= invLengths;
mask *= nextCW(invLengths);
Sk4f udiff = nextCCW(*u) - *u;
Sk4f vdiff = nextCCW(*v) - *v;
*u += maskCW * -udiff + mask * nextCW(udiff);
*v += maskCW * -vdiff + mask * nextCW(vdiff);
if (uvrChannelCount == 3) {
Sk4f rdiff = nextCCW(*r) - *r;
*r += maskCW * -rdiff + mask * nextCW(rdiff);
}
}
}
} else if (outsetCorners) {
*x += 0.5f * (-xdiff + nextCW(xdiff));
*y += 0.5f * (-ydiff + nextCW(ydiff));
if (uvrChannelCount > 0) {
Sk4f t = 0.5f * invLengths;
Sk4f udiff = nextCCW(*u) - *u;
Sk4f vdiff = nextCCW(*v) - *v;
*u += t * -udiff + nextCW(t) * nextCW(udiff);
*v += t * -vdiff + nextCW(t) * nextCW(vdiff);
if (uvrChannelCount == 3) {
Sk4f rdiff = nextCCW(*r) - *r;
*r += t * -rdiff + nextCW(t) * nextCW(rdiff);
}
}
}
}
// Generalizes the above function to extrapolate local coords such that after perspective division
// of the device coordinate, the original local coordinate value is at the original un-outset
// device position. r is the local coordinate's w component.
static void compute_quad_edges_and_outset_persp_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y,
Sk4f* w, Sk4f* a, Sk4f* b, Sk4f* c,
Sk4f* u, Sk4f* v, Sk4f* r,
int uvrChannelCount) {
SkASSERT(uvrChannelCount == 0 || uvrChannelCount == 2 || uvrChannelCount == 3);
auto iw = (*w).invert();
auto x2d = (*x) * iw;
auto y2d = (*y) * iw;
// Don't compute outset corners in the normalized space, which means u, v, and r don't need
// to be provided here (outset separately below).
compute_quad_edges_and_outset_vertices(aaFlags, &x2d, &y2d, a, b, c, nullptr, nullptr, nullptr,
/* uvr ct */ 0, /* outsetCorners */ false);
static const float kOutset = 0.5f;
if ((GrQuadAAFlags::kLeft | GrQuadAAFlags::kRight) & aaFlags) {
// For each entry in x the equivalent entry in opX is the left/right opposite and so on.
Sk4f opX = SkNx_shuffle<2, 3, 0, 1>(*x);
Sk4f opW = SkNx_shuffle<2, 3, 0, 1>(*w);
Sk4f opY = SkNx_shuffle<2, 3, 0, 1>(*y);
// vx/vy holds the device space left-to-right vectors along top and bottom of the quad.
Sk2f vx = SkNx_shuffle<2, 3>(x2d) - SkNx_shuffle<0, 1>(x2d);
Sk2f vy = SkNx_shuffle<2, 3>(y2d) - SkNx_shuffle<0, 1>(y2d);
Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
// For each device space corner, devP, label its left/right opposite device space point
// opDevPt. The new device space point is opDevPt + s (devPt - opDevPt) where s is
// (length(devPt - opDevPt) + 0.5) / length(devPt - opDevPt);
Sk4f s = SkNx_shuffle<0, 1, 0, 1>((len + kOutset) / len);
// Compute t in homogeneous space from s using similar triangles so that we can produce
// homogeneous outset vertices for perspective-correct interpolation.
Sk4f sOpW = s * opW;
Sk4f t = sOpW / (sOpW + (1.f - s) * (*w));
// mask is used to make the t values be 1 when the left/right side is not antialiased.
Sk4f mask(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
t = t * mask + (1.f - mask);
*x = opX + t * (*x - opX);
*y = opY + t * (*y - opY);
*w = opW + t * (*w - opW);
if (uvrChannelCount > 0) {
Sk4f opU = SkNx_shuffle<2, 3, 0, 1>(*u);
Sk4f opV = SkNx_shuffle<2, 3, 0, 1>(*v);
*u = opU + t * (*u - opU);
*v = opV + t * (*v - opV);
if (uvrChannelCount == 3) {
Sk4f opR = SkNx_shuffle<2, 3, 0, 1>(*r);
*r = opR + t * (*r - opR);
}
}
if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
// Update the 2D points for the top/bottom calculation.
iw = (*w).invert();
x2d = (*x) * iw;
y2d = (*y) * iw;
}
}
if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
// This operates the same as above but for top/bottom rather than left/right.
Sk4f opX = SkNx_shuffle<1, 0, 3, 2>(*x);
Sk4f opW = SkNx_shuffle<1, 0, 3, 2>(*w);
Sk4f opY = SkNx_shuffle<1, 0, 3, 2>(*y);
Sk2f vx = SkNx_shuffle<1, 3>(x2d) - SkNx_shuffle<0, 2>(x2d);
Sk2f vy = SkNx_shuffle<1, 3>(y2d) - SkNx_shuffle<0, 2>(y2d);
Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
Sk4f s = SkNx_shuffle<0, 0, 1, 1>((len + kOutset) / len);
Sk4f sOpW = s * opW;
Sk4f t = sOpW / (sOpW + (1.f - s) * (*w));
Sk4f mask(GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f);
t = t * mask + (1.f - mask);
*x = opX + t * (*x - opX);
*y = opY + t * (*y - opY);
*w = opW + t * (*w - opW);
if (uvrChannelCount > 0) {
Sk4f opU = SkNx_shuffle<1, 0, 3, 2>(*u);
Sk4f opV = SkNx_shuffle<1, 0, 3, 2>(*v);
*u = opU + t * (*u - opU);
*v = opV + t * (*v - opV);
if (uvrChannelCount == 3) {
Sk4f opR = SkNx_shuffle<1, 0, 3, 2>(*r);
*r = opR + t * (*r - opR);
}
}
}
}
// Fast path for non-AA quads batched into an AA op. Since they are part of the AA op, the vertices
// need to have valid edge equations that ensure coverage is set to 1. To get perspective
// interpolation of the edge distance, the vertex shader outputs d*w and then multiplies by 1/w in
// the fragment shader. For non-AA edges, the edge equation can be simplified to 0*x/w + y/w + c >=
// 1, so the vertex shader outputs c*w. The quad is sent as two triangles, so a fragment is the
// interpolation between 3 of the 4 vertices. If iX are the weights for the 3 involved quad
// vertices, then the fragment shader's state is:
// f_cw = c * (iA*wA + iB*wB + iC*wC) and f_1/w = iA/wA + iB/wB + iC/wC
// (where A,B,C are chosen from {1,2,3, 4})
// When there's no perspective, then f_cw*f_1/w = c and setting c = 1 guarantees a proper non-AA
// edge. Unfortunately when there is perspective, f_cw*f_1/w != c unless the fragment is at a
// vertex. We must pick a c such that f_cw*f_1/w >= 1 across the whole primitive.
// Let n = min(w1,w2,w3,w4) and m = max(w1,w2,w3,w4) and rewrite
// f_1/w=(iA*wB*wC + iB*wA*wC + iC*wA*wB) / (wA*wB*wC)
// Since the iXs are weights for the interior of the primitive, then we have:
// n <= (iA*wA + iB*wB + iC*wC) <= m and
// n^2 <= (iA*wB*wC + iB*wA*wC + iC*wA*wB) <= m^2 and
// n^3 <= wA*wB*wC <= m^3 regardless of the choice of A,B, and C
// Thus if we set c = m^3/n^3, it guarantees f_cw*f_1/w >= 1 for any perspective.
static SkPoint3 compute_non_aa_persp_edge_coeffs(const Sk4f& w) {
float n = w.min();
float m = w.max();
return {0.f, 0.f, (m * m * m) / (n * n * n)};
}
// When there's guaranteed no perspective, the edge coefficients for non-AA quads is constant
static constexpr SkPoint3 kNonAANoPerspEdgeCoeffs = {0, 0, 1};
// This packs the four quad vertices' values for a given channel (the data) into a block. Returns
// the offset for the next block to be written to localStorage
static int store(const Sk4f& data, float* localStorage, int offset) {
data.store(localStorage + offset);
return offset + 4;
}
// This unpacks dimCt values from a series of channels. By initializing offset from 0 to 3 (plus
// any channels' offsets to skip over), the particular quad vertex can be accessed. Returns the
// offset for the next channel of data in localStorage.
static int load(const float* localStorage, int offset, float* coordOut, int dimCt) {
for (int i = 0; i < dimCt; i++) {
coordOut[i] = localStorage[offset];
offset += 4;
}
return offset;
}
} // anonymous namespace
void GrQuadPerEdgeAA::TessellateImpl(void* vertices, size_t vertexSize, float* localStorage,
const GrPerspQuad& deviceQuad, int posDim, size_t posOffset, size_t posSize,
const void* color, size_t colorOffset, size_t colorSize,
const GrPerspQuad& srcQuad, int srcDim, size_t srcOffset, size_t srcSize,
const void* domain, size_t domainOffset, size_t domainSize,
GrQuadAAFlags aaFlags, size_t aaOffset, size_t aaSize) {
// Make sure the device and local positions are dimensions that are supported
SkASSERT(posDim == 2 || posDim == 3);
SkASSERT(srcDim == 0 || srcDim == 2 || srcDim == 3);
// Make sure that the position sizes are the proper multiples of sizeof(float) since we copy
// floats directly into the block without converting types
SkASSERT(posSize == posDim * sizeof(float));
SkASSERT(srcSize == srcDim * sizeof(float));
// Make sure the component sizes completely fill the vertex
SkASSERT(vertexSize == posSize + colorSize + srcSize + domainSize + aaSize);
// Load position data into Sk4fs (always x, y and maybe w)
Sk4f x = deviceQuad.x4f();
Sk4f y = deviceQuad.y4f();
Sk4f w;
if (posDim == 3) {
w = deviceQuad.w4f();
}
// Load local position data into Sk4fs (either none, just u,v or all three)
Sk4f u, v, r;
if (srcDim > 0) {
u = srcQuad.x4f();
v = srcQuad.y4f();
if (srcDim == 3) {
r = srcQuad.w4f();
}
}
Sk4f a, b, c;
if (aaSize) {
// Must calculate edges and possibly outside the positions
if (aaFlags == GrQuadAAFlags::kNone) {
// A non-AA quad that got batched into an AA group, so its edges will be the same for
// all four vertices and it does not need to be outset
SkPoint3 edgeCoeffs;
if (posDim == 3) {
edgeCoeffs = compute_non_aa_persp_edge_coeffs(w);
} else {
edgeCoeffs = kNonAANoPerspEdgeCoeffs;
}
// Copy the coefficients into all four equations
a = edgeCoeffs.fX;
b = edgeCoeffs.fY;
c = edgeCoeffs.fZ;
} else if (posDim == 2) {
// For simplicity, pointers to u, v, and r are always provided, but srcDim
// ensures that only loaded Sk4fs are modified in the compute functions.
compute_quad_edges_and_outset_vertices(
aaFlags, &x, &y, &a, &b, &c, &u, &v, &r, srcDim, /* outset */ true);
} else {
compute_quad_edges_and_outset_persp_vertices(
aaFlags, &x, &y, &w, &a, &b, &c, &u, &v, &r, srcDim);
}
}
// It is faster to unpack the Sk4fs all at once than access their components out of order.
int offset = store(x, localStorage, 0);
offset = store(y, localStorage, offset);
if (posDim == 3) {
offset = store(w, localStorage, offset);
}
if (srcDim > 0) {
offset = store(u, localStorage, offset);
offset = store(v, localStorage, offset);
if (srcDim == 3) {
offset = store(w, localStorage, offset);
}
}
int edgeOffset = offset; // The 4 edges are separate from the 4 vertices
if (aaSize) {
offset = store(a, localStorage, offset);
offset = store(b, localStorage, offset);
offset = store(c, localStorage, offset);
}
// Now rearrange the unpacked buffer into the vertex layout
char* vb = reinterpret_cast<char*>(vertices);
for (int i = 0; i < 4; ++i) {
// Starting the offset at i makes sure that all loads read the data for the i^th vertex
offset = i;
// NOTE: while this code uses explicit offsets to make it independent of the actual
// vertex layout, it is a good idea to keep the writes in the same order as the fields
// save position
offset = load(localStorage, offset, reinterpret_cast<float*>(vb + posOffset), posDim);
// save color
if (colorSize) {
memcpy(vb + colorOffset, color, colorSize);
}
// save local position
if (srcDim) {
offset = load(localStorage, offset, reinterpret_cast<float*>(vb + srcOffset), srcDim);
}
// save the domain
if (domainSize) {
memcpy(vb + domainOffset, domain, domainSize);
}
// save the edges
if (aaSize) {
float* edgeBuffer = reinterpret_cast<float*>(vb + aaOffset);
for (int j = 0; j < 4; j++) {
load(localStorage, edgeOffset + j, edgeBuffer, 3);
edgeBuffer += 3;
}
}
vb += vertexSize;
}
}

View File

@ -0,0 +1,100 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef GrQuadPerEdgeAA_DEFINED
#define GrQuadPerEdgeAA_DEFINED
#include "GrColor.h"
#include "GrSamplerState.h"
#include "GrTypesPriv.h"
#include "SkPoint.h"
#include "SkPoint3.h"
class GrPerspQuad;
class GrQuadPerEdgeAA {
public:
enum class Domain : bool { kNo = false, kYes = true };
// The vertex template provides a clean way of specifying the layout and components of a vertex
// for a per-edge aa quad. However, because there are so many permutations possible, the struct
// is defined this way to take away all layout control from the compiler and make
// sure that it matches what we need to send to the GPU.
//
// It is expected that most code using these vertices will only need to call the templated
// Tessellate() function with an appropriately sized vertex buffer and not need to modify or
// read the fields of a particular vertex.
template <int PosDim, typename C, int LocalPosDim, Domain D, GrAA AA>
struct Vertex {
using Color = C;
static constexpr GrAA kAA = AA;
static constexpr Domain kDomain = D;
static constexpr size_t kPositionDim = PosDim;
static constexpr size_t kLocalPositionDim = LocalPosDim;
static constexpr size_t kPositionOffset = 0;
static constexpr size_t kPositionSize = PosDim * sizeof(float);
static constexpr size_t kColorOffset = kPositionOffset + kPositionSize;
static constexpr size_t kColorSize = sizeof(Color);
static constexpr size_t kLocalPositionOffset = kColorOffset + kColorSize;
static constexpr size_t kLocalPositionSize = LocalPosDim * sizeof(float);
static constexpr size_t kDomainOffset = kLocalPositionOffset + kLocalPositionSize;
static constexpr size_t kDomainSize = D == Domain::kYes ? sizeof(SkRect) : 0;
static constexpr size_t kAAOffset = kDomainOffset + kDomainSize;
static constexpr size_t kAASize = AA == GrAA::kYes ? 4 * sizeof(SkPoint3) : 0;
static constexpr size_t kVertexSize = kAAOffset + kAASize;
// Make sure sizeof(Vertex<...>) == kVertexSize
char fData[kVertexSize];
};
// Tessellate the given quad specification into the vertices buffer. If the specific vertex
// type does not use color, local positions, domain, etc. then the passed in values used for
// that field will be ignored.
template<typename V>
static void Tessellate(V* vertices, const GrPerspQuad& deviceQuad, typename V::Color color,
const GrPerspQuad& srcQuad, const SkRect& domain, GrQuadAAFlags aa) {
static_assert(sizeof(V) == V::kVertexSize, "Incorrect vertex size");
static constexpr bool useCoverageAA = V::kAA == GrAA::kYes;
float localStorage[4 * (V::kPositionDim + V::kLocalPositionDim + (useCoverageAA ? 3 : 0))];
TessellateImpl(vertices, V::kVertexSize, localStorage,
deviceQuad, V::kPositionDim, V::kPositionOffset, V::kPositionSize,
&color, V::kColorOffset, V::kColorSize,
srcQuad, V::kLocalPositionDim, V::kLocalPositionOffset, V::kLocalPositionSize,
&domain, V::kDomainOffset, V::kDomainSize,
aa, V::kAAOffset, V::kAASize);
}
private:
// Don't let the "namespace" class be instantiated
GrQuadPerEdgeAA();
// Internal implementation that can handle all vertex template variations without being
// replicated by the template in order to keep code size down.
//
// This uses the field sizes to determine if particular data needs to be computed. The arguments
// are arranged so that the data and field specification match the field declaration order of
// the vertex type (pos, color, localPos, domain, aa).
//
// localStorage must be have a length > 4 * (devDimCt + srcDimCt + (aa ? 3 : 0)) and is assumed
// to be a pointer to a local variable in the wrapping template's stack. This is done instead of
// always allocating 36 floats in this function (36 is maximum needed). The minimum needed for a
// non-AA 2D quad with no local coordinates is just 8.
static void TessellateImpl(void* vertices, size_t vertexSize, float* localStorage,
const GrPerspQuad& deviceQuad, int posDim, size_t posOffset, size_t posSize,
const void* color, size_t colorOffset, size_t colorSize,
const GrPerspQuad& srcQuad, int srcDim, size_t srcOffset, size_t srcSize,
const void* domain, size_t domainOffset, size_t domainSize,
GrQuadAAFlags aaFlags, size_t aaOffset, size_t aaSize);
};
#endif // GrQuadPerEdgeAA_DEFINED

View File

@ -17,6 +17,7 @@
#include "GrMeshDrawOp.h"
#include "GrOpFlushState.h"
#include "GrQuad.h"
#include "GrQuadPerEdgeAA.h"
#include "GrResourceProvider.h"
#include "GrShaderCaps.h"
#include "GrTexture.h"
@ -37,7 +38,7 @@
namespace {
enum class Domain : bool { kNo = false, kYes = true };
using Domain = GrQuadPerEdgeAA::Domain;
/**
* Geometry Processor that draws a texture modulated by a vertex color (though, this is meant to be
@ -46,44 +47,13 @@ enum class Domain : bool { kNo = false, kYes = true };
*/
class TextureGeometryProcessor : public GrGeometryProcessor {
public:
template <typename Pos> struct VertexCommon {
using Position = Pos;
Position fPosition;
GrColor fColor;
SkPoint fTextureCoords;
};
template <typename Pos, Domain D> struct OptionalDomainVertex;
template <typename Pos>
struct OptionalDomainVertex<Pos, Domain::kNo> : VertexCommon<Pos> {
static constexpr Domain kDomain = Domain::kNo;
};
template <typename Pos>
struct OptionalDomainVertex<Pos, Domain::kYes> : VertexCommon<Pos> {
static constexpr Domain kDomain = Domain::kYes;
SkRect fTextureDomain;
};
template <typename Pos, Domain D, GrAA> struct OptionalAAVertex;
template <typename Pos, Domain D>
struct OptionalAAVertex<Pos, D, GrAA::kNo> : OptionalDomainVertex<Pos, D> {
static constexpr GrAA kAA = GrAA::kNo;
};
template <typename Pos, Domain D>
struct OptionalAAVertex<Pos, D, GrAA::kYes> : OptionalDomainVertex<Pos, D> {
static constexpr GrAA kAA = GrAA::kYes;
SkPoint3 fEdges[4];
};
template <typename Pos, Domain D, GrAA AA>
using Vertex = OptionalAAVertex<Pos, D, AA>;
static sk_sp<GrGeometryProcessor> Make(GrTextureType textureType, GrPixelConfig textureConfig,
const GrSamplerState::Filter filter,
sk_sp<GrColorSpaceXform> textureColorSpaceXform,
sk_sp<GrColorSpaceXform> paintColorSpaceXform,
bool coverageAA, bool perspective, Domain domain,
const GrShaderCaps& caps) {
bool coverageAA, bool perspective,
Domain domain, const GrShaderCaps& caps) {
return sk_sp<TextureGeometryProcessor>(new TextureGeometryProcessor(
textureType, textureConfig, filter, std::move(textureColorSpaceXform),
std::move(paintColorSpaceXform), coverageAA, perspective, domain, caps));
@ -265,334 +235,6 @@ private:
typedef GrGeometryProcessor INHERITED;
};
// This computes the four edge equations for a quad, then outsets them and optionally computes a new
// quad as the intersection points of the outset edges. 'x' and 'y' contain the original points as
// input and the outset points as output. 'a', 'b', and 'c' are the edge equation coefficients on
// output. If outsetCorners is true then 'u' and 'v' should hold the texture coordinates on input
// and will also be outset.
static void compute_quad_edges_and_outset_vertices(GrQuadAAFlags aaFlags, Sk4f* x, Sk4f* y, Sk4f* a,
Sk4f* b, Sk4f* c, bool outsetCorners = false,
Sk4f* u = nullptr, Sk4f* v = nullptr) {
static constexpr auto fma = SkNx_fma<4, float>;
// These rotate the points/edge values either clockwise or counterclockwise assuming tri strip
// order.
auto nextCW = [](const Sk4f& v) { return SkNx_shuffle<2, 0, 3, 1>(v); };
auto nextCCW = [](const Sk4f& v) { return SkNx_shuffle<1, 3, 0, 2>(v); };
// Compute edge equations for the quad.
auto xnext = nextCCW(*x);
auto ynext = nextCCW(*y);
// xdiff and ydiff will comprise the normalized vectors pointing along each quad edge.
auto xdiff = xnext - *x;
auto ydiff = ynext - *y;
auto invLengths = fma(xdiff, xdiff, ydiff * ydiff).rsqrt();
xdiff *= invLengths;
ydiff *= invLengths;
// Use above vectors to compute edge equations.
*c = fma(xnext, *y, -ynext * *x) * invLengths;
// Make sure the edge equations have their normals facing into the quad in device space.
auto test = fma(ydiff, nextCW(*x), fma(-xdiff, nextCW(*y), *c));
if ((test < Sk4f(0)).anyTrue()) {
*a = -ydiff;
*b = xdiff;
*c = -*c;
} else {
*a = ydiff;
*b = -xdiff;
}
// Outset the edge equations so aa coverage evaluates to zero half a pixel away from the
// original quad edge.
*c += 0.5f;
if (aaFlags != GrQuadAAFlags::kAll) {
// This order is the same order the edges appear in xdiff/ydiff and therefore as the
// edges in a/b/c.
auto mask = Sk4f(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
// Outset edge equations for masked out edges another pixel so that they always evaluate
// >= 1.
*c += (1.f - mask);
if (outsetCorners) {
// Do the vertex outset.
mask *= 0.5f;
auto maskCW = nextCW(mask);
*x += maskCW * -xdiff + mask * nextCW(xdiff);
*y += maskCW * -ydiff + mask * nextCW(ydiff);
// We want to extend the texture coords by the same proportion as the positions.
maskCW *= invLengths;
mask *= nextCW(invLengths);
Sk4f udiff = nextCCW(*u) - *u;
Sk4f vdiff = nextCCW(*v) - *v;
*u += maskCW * -udiff + mask * nextCW(udiff);
*v += maskCW * -vdiff + mask * nextCW(vdiff);
}
} else if (outsetCorners) {
*x += 0.5f * (-xdiff + nextCW(xdiff));
*y += 0.5f * (-ydiff + nextCW(ydiff));
Sk4f t = 0.5f * invLengths;
Sk4f udiff = nextCCW(*u) - *u;
Sk4f vdiff = nextCCW(*v) - *v;
*u += t * -udiff + nextCW(t) * nextCW(udiff);
*v += t * -vdiff + nextCW(t) * nextCW(vdiff);
}
}
namespace {
// This is a class soley so it can be partially specialized (functions cannot be).
template <typename V, GrAA AA = V::kAA, typename Position = typename V::Position>
class VertexAAHandler;
template<typename V> class VertexAAHandler<V, GrAA::kNo, SkPoint> {
public:
static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
GrQuadAAFlags aaFlags, const SkRect& texRect) {
// Should be kNone for non-AA and kAll for MSAA.
SkASSERT(aaFlags == GrQuadAAFlags::kNone || aaFlags == GrQuadAAFlags::kAll);
SkASSERT(!quad.hasPerspective());
SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = {quad.x(i), quad.y(i)};
}
}
};
template<typename V> class VertexAAHandler<V, GrAA::kNo, SkPoint3> {
public:
static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
GrQuadAAFlags aaFlags, const SkRect& texRect) {
// Should be kNone for non-AA and kAll for MSAA.
SkASSERT(aaFlags == GrQuadAAFlags::kNone || aaFlags == GrQuadAAFlags::kAll);
SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = quad.point(i);
}
}
};
template<typename V> class VertexAAHandler<V, GrAA::kYes, SkPoint> {
public:
static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
GrQuadAAFlags aaFlags, const SkRect& texRect) {
SkASSERT(!quad.hasPerspective());
if (aaFlags == GrQuadAAFlags::kNone) {
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = {quad.x(i), quad.y(i)};
for (int j = 0; j < 4; ++j) {
// This works because the position w components are known to be 1.
vertices[i].fEdges[j] = {0, 0, 1};
}
}
SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
return;
}
auto x = quad.x4f();
auto y = quad.y4f();
Sk4f a, b, c;
Sk4f u{texRect.fLeft, texRect.fLeft, texRect.fRight, texRect.fRight};
Sk4f v{texRect.fTop, texRect.fBottom, texRect.fTop, texRect.fBottom};
compute_quad_edges_and_outset_vertices(aaFlags, &x, &y, &a, &b, &c, true, &u, &v);
// Faster to store the Sk4fs all at once rather than element-by-element into vertices.
float xs[4], ys[4], as[4], bs[4], cs[4], us[4], vs[4];
x.store(xs);
y.store(ys);
a.store(as);
b.store(bs);
c.store(cs);
u.store(us);
v.store(vs);
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = {xs[i], ys[i]};
vertices[i].fTextureCoords = {us[i], vs[i]};
for (int j = 0; j < 4; ++j) {
vertices[i].fEdges[j] = {as[j], bs[j], cs[j]};
}
}
}
};
template<typename V> class VertexAAHandler<V, GrAA::kYes, SkPoint3> {
public:
static void AssignPositionsAndTexCoords(V* vertices, const GrPerspQuad& quad,
GrQuadAAFlags aaFlags, const SkRect& texRect) {
auto x = quad.x4f();
auto y = quad.y4f();
auto iw = quad.iw4f();
if ((iw == Sk4f(1)).allTrue() && aaFlags == GrQuadAAFlags::kNone) {
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = quad.point(i);
for (int j = 0; j < 4; ++j) {
// This works because the position w components are known to be 1.
vertices[i].fEdges[j] = {0, 0, 1};
}
}
SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(V));
return;
}
Sk4f a, b, c;
auto x2d = x * iw;
auto y2d = y * iw;
compute_quad_edges_and_outset_vertices(aaFlags, &x2d, &y2d, &a, &b, &c);
auto w = quad.w4f();
static const float kOutset = 0.5f;
Sk4f u{texRect.fLeft, texRect.fLeft, texRect.fRight, texRect.fRight};
Sk4f v{texRect.fTop, texRect.fBottom, texRect.fTop, texRect.fBottom};
if ((GrQuadAAFlags::kLeft | GrQuadAAFlags::kRight) & aaFlags) {
// For each entry in x the equivalent entry in opX is the left/right opposite and so on.
Sk4f opX = SkNx_shuffle<2, 3, 0, 1>(x);
Sk4f opW = SkNx_shuffle<2, 3, 0, 1>(w);
Sk4f opY = SkNx_shuffle<2, 3, 0, 1>(y);
// vx/vy holds the device space left-to-right vectors along top and bottom of the quad.
Sk2f vx = SkNx_shuffle<2, 3>(x2d) - SkNx_shuffle<0, 1>(x2d);
Sk2f vy = SkNx_shuffle<2, 3>(y2d) - SkNx_shuffle<0, 1>(y2d);
Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
// For each device space corner, devP, label its left/right opposite device space point
// opDevPt. The new device space point is opDevPt + s (devPt - opDevPt) where s is
// (length(devPt - opDevPt) + 0.5) / length(devPt - opDevPt);
Sk4f s = SkNx_shuffle<0, 1, 0, 1>((len + kOutset) / len);
// Compute t in homogeneous space from s using similar triangles so that we can produce
// homogeneous outset vertices for perspective-correct interpolation.
Sk4f sOpW = s * opW;
Sk4f t = sOpW / (sOpW + (1.f - s) * w);
// mask is used to make the t values be 1 when the left/right side is not antialiased.
Sk4f mask(GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kLeft & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kRight & aaFlags ? 1.f : 0.f);
t = t * mask + (1.f - mask);
x = opX + t * (x - opX);
y = opY + t * (y - opY);
w = opW + t * (w - opW);
Sk4f opU = SkNx_shuffle<2, 3, 0, 1>(u);
Sk4f opV = SkNx_shuffle<2, 3, 0, 1>(v);
u = opU + t * (u - opU);
v = opV + t * (v - opV);
if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
// Update the 2D points for the top/bottom calculation.
iw = w.invert();
x2d = x * iw;
y2d = y * iw;
}
}
if ((GrQuadAAFlags::kTop | GrQuadAAFlags::kBottom) & aaFlags) {
// This operates the same as above but for top/bottom rather than left/right.
Sk4f opX = SkNx_shuffle<1, 0, 3, 2>(x);
Sk4f opW = SkNx_shuffle<1, 0, 3, 2>(w);
Sk4f opY = SkNx_shuffle<1, 0, 3, 2>(y);
Sk2f vx = SkNx_shuffle<1, 3>(x2d) - SkNx_shuffle<0, 2>(x2d);
Sk2f vy = SkNx_shuffle<1, 3>(y2d) - SkNx_shuffle<0, 2>(y2d);
Sk2f len = SkNx_fma(vx, vx, vy * vy).sqrt();
Sk4f s = SkNx_shuffle<0, 0, 1, 1>((len + kOutset) / len);
Sk4f sOpW = s * opW;
Sk4f t = sOpW / (sOpW + (1.f - s) * w);
Sk4f mask(GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kTop & aaFlags ? 1.f : 0.f,
GrQuadAAFlags::kBottom & aaFlags ? 1.f : 0.f);
t = t * mask + (1.f - mask);
x = opX + t * (x - opX);
y = opY + t * (y - opY);
w = opW + t * (w - opW);
Sk4f opU = SkNx_shuffle<1, 0, 3, 2>(u);
Sk4f opV = SkNx_shuffle<1, 0, 3, 2>(v);
u = opU + t * (u - opU);
v = opV + t * (v - opV);
}
// Faster to store the Sk4fs all at once rather than element-by-element into vertices.
float xs[4], ys[4], ws[4], as[4], bs[4], cs[4], us[4], vs[4];
x.store(xs);
y.store(ys);
w.store(ws);
a.store(as);
b.store(bs);
c.store(cs);
u.store(us);
v.store(vs);
for (int i = 0; i < 4; ++i) {
vertices[i].fPosition = {xs[i], ys[i], ws[i]};
vertices[i].fTextureCoords = {us[i], vs[i]};
for (int j = 0; j < 4; ++j) {
vertices[i].fEdges[j] = {as[j], bs[j], cs[j]};
}
}
}
};
template <typename V, Domain D = V::kDomain> struct DomainAssigner;
template <typename V> struct DomainAssigner<V, Domain::kYes> {
static void Assign(V* vertices, Domain domain, GrSamplerState::Filter filter,
const SkRect& srcRect, GrSurfaceOrigin origin, float iw, float ih) {
static constexpr SkRect kLargeRect = {-2, -2, 2, 2};
SkRect domainRect;
if (domain == Domain::kYes) {
auto ltrb = Sk4f::Load(&srcRect);
if (filter == GrSamplerState::Filter::kBilerp) {
auto rblt = SkNx_shuffle<2, 3, 0, 1>(ltrb);
auto whwh = (rblt - ltrb).abs();
auto c = (rblt + ltrb) * 0.5f;
static const Sk4f kOffsets = {0.5f, 0.5f, -0.5f, -0.5f};
ltrb = (whwh < 1.f).thenElse(c, ltrb + kOffsets);
}
ltrb *= Sk4f(iw, ih, iw, ih);
if (origin == kBottomLeft_GrSurfaceOrigin) {
static const Sk4f kMul = {1.f, -1.f, 1.f, -1.f};
static const Sk4f kAdd = {0.f, 1.f, 0.f, 1.f};
ltrb = SkNx_shuffle<0, 3, 2, 1>(kMul * ltrb + kAdd);
}
ltrb.store(&domainRect);
} else {
domainRect = kLargeRect;
}
for (int i = 0; i < 4; ++i) {
vertices[i].fTextureDomain = domainRect;
}
}
};
template <typename V> struct DomainAssigner<V, Domain::kNo> {
static void Assign(V*, Domain domain, GrSamplerState::Filter, const SkRect&, GrSurfaceOrigin,
float iw, float ih) {
SkASSERT(domain == Domain::kNo);
}
};
} // anonymous namespace
template <typename V>
static void tessellate_quad(const GrPerspQuad& devQuad, GrQuadAAFlags aaFlags,
const SkRect& srcRect, GrColor color, GrSurfaceOrigin origin,
GrSamplerState::Filter filter, V* vertices, SkScalar iw, SkScalar ih,
Domain domain) {
SkRect texRect = {
iw * srcRect.fLeft,
ih * srcRect.fTop,
iw * srcRect.fRight,
ih * srcRect.fBottom
};
if (origin == kBottomLeft_GrSurfaceOrigin) {
texRect.fTop = 1.f - texRect.fTop;
texRect.fBottom = 1.f - texRect.fBottom;
}
VertexAAHandler<V>::AssignPositionsAndTexCoords(vertices, devQuad, aaFlags, texRect);
vertices[0].fColor = color;
vertices[1].fColor = color;
vertices[2].fColor = color;
vertices[3].fColor = color;
DomainAssigner<V>::Assign(vertices, domain, filter, srcRect, origin, iw, ih);
}
static bool filter_has_effect_for_rect_stays_rect(const GrPerspQuad& quad, const SkRect& srcRect) {
SkASSERT(quad.quadType() == GrQuadType::kRect_QuadType);
float ql = quad.x(0);
@ -607,6 +249,52 @@ static bool filter_has_effect_for_rect_stays_rect(const GrPerspQuad& quad, const
SkScalarFraction(qt) != SkScalarFraction(srcRect.fTop);
}
static SkRect compute_domain(Domain domain, GrSamplerState::Filter filter,
GrSurfaceOrigin origin, const SkRect& srcRect, float iw, float ih) {
static constexpr SkRect kLargeRect = {-2, -2, 2, 2};
if (domain == Domain::kNo) {
// Either the quad has no domain constraint and is batched with a domain constrained op
// (in which case we want a domain that doesn't restrict normalized tex coords), or the
// entire op doesn't use the domain, in which case the returned value is ignored.
return kLargeRect;
}
auto ltrb = Sk4f::Load(&srcRect);
if (filter == GrSamplerState::Filter::kBilerp) {
auto rblt = SkNx_shuffle<2, 3, 0, 1>(ltrb);
auto whwh = (rblt - ltrb).abs();
auto c = (rblt + ltrb) * 0.5f;
static const Sk4f kOffsets = {0.5f, 0.5f, -0.5f, -0.5f};
ltrb = (whwh < 1.f).thenElse(c, ltrb + kOffsets);
}
ltrb *= Sk4f(iw, ih, iw, ih);
if (origin == kBottomLeft_GrSurfaceOrigin) {
static const Sk4f kMul = {1.f, -1.f, 1.f, -1.f};
static const Sk4f kAdd = {0.f, 1.f, 0.f, 1.f};
ltrb = SkNx_shuffle<0, 3, 2, 1>(kMul * ltrb + kAdd);
}
SkRect domainRect;
ltrb.store(&domainRect);
return domainRect;
}
static GrPerspQuad compute_src_quad(GrSurfaceOrigin origin, const SkRect& srcRect,
float iw, float ih) {
// Convert the pixel-space src rectangle into normalized texture coordinates
SkRect texRect = {
iw * srcRect.fLeft,
ih * srcRect.fTop,
iw * srcRect.fRight,
ih * srcRect.fBottom
};
if (origin == kBottomLeft_GrSurfaceOrigin) {
texRect.fTop = 1.f - texRect.fTop;
texRect.fBottom = 1.f - texRect.fBottom;
}
return GrPerspQuad(texRect, SkMatrix::I());
}
/**
* Op that implements GrTextureOp::Make. It draws textured quads. Each quad can modulate against a
* the texture by color. The blend with the destination is always src-over. The edges are non-AA.
@ -808,11 +496,11 @@ private:
fDomain = static_cast<unsigned>(false);
}
template <typename Pos, Domain D, GrAA AA>
template <int PosDim, Domain D, GrAA AA>
void tess(void* v, const GrGeometryProcessor* gp, const GrTextureProxy* proxy, int start,
int cnt) const {
TRACE_EVENT0("skia", TRACE_FUNC);
using Vertex = TextureGeometryProcessor::Vertex<Pos, D, AA>;
using Vertex = GrQuadPerEdgeAA::Vertex<PosDim, GrColor, 2, D, AA>;
SkASSERT(gp->debugOnly_vertexStride() == sizeof(Vertex));
auto vertices = static_cast<Vertex*>(v);
auto origin = proxy->origin();
@ -822,8 +510,10 @@ private:
for (int i = start; i < start + cnt; ++i) {
const auto q = fQuads[i];
tessellate_quad<Vertex>(q.quad(), q.aaFlags(), q.srcRect(), q.color(), origin,
this->filter(), vertices, iw, ih, q.domain());
GrPerspQuad srcQuad = compute_src_quad(origin, q.srcRect(), iw, ih);
SkRect domain = compute_domain(q.domain(), this->filter(), origin, q.srcRect(), iw, ih);
GrQuadPerEdgeAA::Tessellate<Vertex>(
vertices, q.quad(), q.color(), srcQuad, domain, q.aaFlags());
vertices += 4;
}
}
@ -886,24 +576,24 @@ private:
}
const auto* pipeline =
target->allocPipeline(args, GrProcessorSet::MakeEmptySet(), std::move(clip));
using TessFn = decltype(&TextureOp::tess<SkPoint, Domain::kNo, GrAA::kNo>);
using TessFn = decltype(&TextureOp::tess<2, Domain::kNo, GrAA::kNo>);
#define TESS_FN_AND_VERTEX_SIZE(Point, Domain, AA) \
{ \
&TextureOp::tess<Point, Domain, AA>, \
sizeof(TextureGeometryProcessor::Vertex<Point, Domain, AA>) \
sizeof(GrQuadPerEdgeAA::Vertex<Point, GrColor, 2, Domain, AA>) \
}
static constexpr struct {
TessFn fTessFn;
size_t fVertexSize;
} kTessFnsAndVertexSizes[] = {
TESS_FN_AND_VERTEX_SIZE(SkPoint, Domain::kNo, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(SkPoint, Domain::kNo, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(SkPoint, Domain::kYes, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(SkPoint, Domain::kYes, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kNo, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kNo, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kYes, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(SkPoint3, Domain::kYes, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(2, Domain::kNo, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(2, Domain::kNo, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(2, Domain::kYes, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(2, Domain::kYes, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(3, Domain::kNo, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(3, Domain::kNo, GrAA::kYes),
TESS_FN_AND_VERTEX_SIZE(3, Domain::kYes, GrAA::kNo),
TESS_FN_AND_VERTEX_SIZE(3, Domain::kYes, GrAA::kYes),
};
#undef TESS_FN_AND_VERTEX_SIZE
int tessFnIdx = 0;