Remove recursion from PathWedgeTessellator chopping

Rather that recursively running Wang's formula and chopping, we run
Wang's once, divide by the maximum number of tessellation segments,
then chop in a flat loop. The next step will be to share this code
with other tessellators.

Bug: skia:12524
Change-Id: I6c5156f56885aee9f90b3be611fbef229740612d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/465362
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
Chris Dalton 2021-10-29 12:22:13 -06:00 committed by SkCQ
parent ccb459d57b
commit 784b7b7ab5
5 changed files with 263 additions and 116 deletions

View File

@ -68,6 +68,26 @@ static SkPath make_conic_path() {
return path;
}
SK_MAYBE_UNUSED static SkPath make_quad_path(int maxPow2) {
SkRandom rand;
SkPath path;
for (int i = 0; i < kNumCubicsInChalkboard; ++i) {
float x = std::ldexp(rand.nextF(), (i % maxPow2)) / 1e3f;
path.quadTo(111.625f * x, 308.188f * x, 764.62f * x, -435.688f * x);
}
return path;
}
SK_MAYBE_UNUSED static SkPath make_line_path(int maxPow2) {
SkRandom rand;
SkPath path;
for (int i = 0; i < kNumCubicsInChalkboard; ++i) {
float x = std::ldexp(rand.nextF(), (i % maxPow2)) / 1e3f;
path.lineTo(764.62f * x, -435.688f * x);
}
return path;
}
// This serves as a base class for benchmarking individual methods on PathTessellateOp.
class PathTessellateBenchmark : public Benchmark {
public:

View File

@ -46,6 +46,8 @@ public:
~GrVertexChunkBuilder();
size_t stride() const { return fStride; }
// Appends 'count' contiguous vertices. These vertices are not guaranteed to be contiguous with
// previous or future calls to appendVertices.
SK_ALWAYS_INLINE skgpu::VertexWriter appendVertices(int count) {

View File

@ -21,6 +21,25 @@
namespace skgpu {
struct LineToCubic {
float4 fP0P1;
};
static VertexWriter& operator<<(VertexWriter& vertexWriter, const LineToCubic& line) {
float4 p0p1 = line.fP0P1;
float4 v = p0p1.zwxy() - p0p1;
return vertexWriter << p0p1.lo << (v * (1/3.f) + p0p1) << p0p1.hi;
}
struct QuadToCubic {
float2 fP0, fP1, fP2;
};
static VertexWriter& operator<<(VertexWriter& vertexWriter, const QuadToCubic& quadratic) {
auto [p0, p1, p2] = quadratic;
return vertexWriter << p0 << mix(float4(p0,p2), p1.xyxy(), 2/3.f) << p2;
}
namespace {
// Parses out each contour in a path and tracks the midpoint. Example usage:
@ -112,9 +131,6 @@ private:
int fMidpointWeight;
};
} // namespace
// Writes out wedge patches, chopping as necessary so none require more segments than are supported
// by the hardware.
class WedgeWriter {
@ -123,10 +139,12 @@ public:
GrVertexChunkArray* vertexChunkArray,
size_t patchStride,
int initialPatchAllocCount,
int maxSegments)
int maxSegments,
const GrShaderCaps& shaderCaps)
: fChunker(target, vertexChunkArray, patchStride, initialPatchAllocCount)
, fMaxSegments_pow2(maxSegments * maxSegments)
, fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2) {
, fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2)
, fGPUInfinitySupport(shaderCaps.infinitySupport()) {
}
void setMatrices(const SkMatrix& shaderMatrix,
@ -137,122 +155,203 @@ public:
fPathXform = pathMatrix;
}
const PathXform& pathXform() const { return fPathXform; }
void setMidpoint(SkPoint midpoint) { fMidpoint = fPathXform.mapPoint(midpoint); }
SK_ALWAYS_INLINE void writeFlatWedge(const GrShaderCaps& shaderCaps,
SkPoint p0,
SkPoint p1,
SkPoint midpoint) {
if (VertexWriter vertexWriter = fChunker.appendVertex()) {
fPathXform.mapLineToCubic(&vertexWriter, p0, p1);
vertexWriter << midpoint
<< VertexWriter::If(!shaderCaps.infinitySupport(),
GrTessellationShader::kCubicCurveType);
void lineTo(const SkPoint p[2]) {
CubicPatch(this) << LineToCubic{fPathXform.map2Points(p)};
}
void quadTo(const SkPoint p[3]) {
auto [p0, p1] = fPathXform.map2Points(p);
auto p2 = fPathXform.map1Point(p+2);
float n4 = wangs_formula::quadratic_pow4(kTessellationPrecision, p, fTotalVectorXform);
if (n4 <= fMaxSegments_pow4) {
// This quad already fits into "maxSegments" tessellation segments.
CubicPatch(this) << QuadToCubic{p0, p1, p2};
fNumFixedSegments_pow4 = std::max(n4, fNumFixedSegments_pow4);
} else {
// Chop until each quad requires "maxSegments" tessellation segments or fewer.
int numPatches = SkScalarCeilToInt(wangs_formula::root4(n4/fMaxSegments_pow4));
for (; numPatches >= 3; numPatches -= 2) {
// Chop into 3 quads.
float4 T = float4(1,1,2,2) / numPatches;
float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
float4 abc = mix(ab, bc, T);
// p1 & p2 of the cubic representation of the middle quad.
float4 middle = mix(ab, bc, mix(T, T.zwxy(), 2/3.f));
CubicPatch(this) << QuadToCubic{p0, ab.lo, abc.lo}; // Write 1st quad.
CubicPatch(this) << abc.lo << middle << abc.hi; // Write 2nd quad.
std::tie(p0, p1) = {abc.hi, bc.hi}; // Save 3rd quad.
}
if (numPatches == 2) {
// Chop into 2 quads.
float2 ab = (p0 + p1) * .5f;
float2 bc = (p1 + p2) * .5f;
float2 abc = (ab + bc) * .5f;
CubicPatch(this) << QuadToCubic{p0, ab, abc}; // Write 1st quad.
CubicPatch(this) << QuadToCubic{abc, bc, p2}; // Write 2nd quad.
} else {
SkASSERT(numPatches == 1);
CubicPatch(this) << QuadToCubic{p0, p1, p2}; // Write single quad.
}
fNumFixedSegments_pow4 = fMaxSegments_pow4;
}
}
SK_ALWAYS_INLINE void writeQuadraticWedge(const GrShaderCaps& shaderCaps,
const SkPoint p[3],
SkPoint midpoint) {
float numSegments_pow4 = wangs_formula::quadratic_pow4(kTessellationPrecision,
p,
fTotalVectorXform);
if (numSegments_pow4 > fMaxSegments_pow4) {
this->chopAndWriteQuadraticWedges(shaderCaps, p, midpoint);
return;
void conicTo(const SkPoint p[3], float w) {
float n2 = wangs_formula::conic_pow2(kTessellationPrecision, p, w, fTotalVectorXform);
if (n2 <= fMaxSegments_pow2) {
// This conic already fits into "maxSegments" tessellation segments.
ConicPatch(this) << fPathXform.map2Points(p) << fPathXform.map1Point(p+2) << w;
fNumFixedSegments_pow4 = std::max(n2*n2, fNumFixedSegments_pow4);
} else {
// Load the conic in homogeneous (unprojected) space.
float4 p0 = float4(fPathXform.map1Point(p), 1, 1);
float4 p1 = float4(fPathXform.map1Point(p+1), 1, 1) * w;
float4 p2 = float4(fPathXform.map1Point(p+2), 1, 1);
// Chop until each conic requires "maxSegments" tessellation segments or fewer.
int numPatches = SkScalarCeilToInt(sqrtf(n2/fMaxSegments_pow2));
for (; numPatches >= 2; --numPatches) {
// Chop in homogeneous space.
float T = 1.f/numPatches;
float4 ab = mix(p0, p1, T);
float4 bc = mix(p1, p2, T);
float4 abc = mix(ab, bc, T);
// Project and write the 1st conic.
ConicPatch(this) << (p0.xy() / p0.w())
<< (ab.xy() / ab.w())
<< (abc.xy() / abc.w())
<< (ab.w() / sqrtf(p0.w() * abc.w()));
std::tie(p0, p1) = {abc, bc}; // Save the 2nd conic (in homogeneous space).
}
// Project and write the remaining conic.
SkASSERT(numPatches == 1);
ConicPatch(this) << (p0.xy() / p0.w())
<< (p1.xy() / p1.w())
<< p2.xy() // p2.w == 1
<< (p1.w() / sqrtf(p0.w()));
fNumFixedSegments_pow4 = fMaxSegments_pow4;
}
if (VertexWriter vertexWriter = fChunker.appendVertex()) {
fPathXform.mapQuadToCubic(&vertexWriter, p);
vertexWriter << midpoint
<< VertexWriter::If(!shaderCaps.infinitySupport(),
GrTessellationShader::kCubicCurveType);
}
fNumFixedSegments_pow4 = std::max(numSegments_pow4, fNumFixedSegments_pow4);
}
SK_ALWAYS_INLINE void writeConicWedge(const GrShaderCaps& shaderCaps,
const SkPoint p[3],
float w,
SkPoint midpoint) {
float numSegments_pow2 = wangs_formula::conic_pow2(kTessellationPrecision,
p,
w,
fTotalVectorXform);
if (numSegments_pow2 > fMaxSegments_pow2) {
this->chopAndWriteConicWedges(shaderCaps, {p, w}, midpoint);
return;
}
if (VertexWriter vertexWriter = fChunker.appendVertex()) {
fPathXform.mapConicToPatch(&vertexWriter, p, w);
vertexWriter << midpoint
<< VertexWriter::If(!shaderCaps.infinitySupport(),
GrTessellationShader::kConicCurveType);
}
fNumFixedSegments_pow4 = std::max(numSegments_pow2 * numSegments_pow2,
fNumFixedSegments_pow4);
}
void cubicTo(const SkPoint p[4]) {
auto [p0, p1] = fPathXform.map2Points(p);
auto [p2, p3] = fPathXform.map2Points(p+2);
float n4 = wangs_formula::cubic_pow4(kTessellationPrecision, p, fTotalVectorXform);
if (n4 <= fMaxSegments_pow4) {
// This cubic already fits into "maxSegments" tessellation segments.
CubicPatch(this) << p0 << p1 << p2 << p3;
fNumFixedSegments_pow4 = std::max(n4, fNumFixedSegments_pow4);
} else {
// Chop until each cubic requires "maxSegments" tessellation segments or fewer.
int numPatches = SkScalarCeilToInt(wangs_formula::root4(n4/fMaxSegments_pow4));
for (; numPatches >= 3; numPatches -= 2) {
// Chop into 3 cubics.
float4 T = float4(1,1,2,2) / numPatches;
float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
float4 cd = mix(p2.xyxy(), p3.xyxy(), T);
float4 abc = mix(ab, bc, T);
float4 bcd = mix(bc, cd, T);
float4 abcd = mix(abc, bcd, T);
float4 middle = mix(abc, bcd, T.zwxy()); // p1 & p2 of the middle cubic.
SK_ALWAYS_INLINE void writeCubicWedge(const GrShaderCaps& shaderCaps,
const SkPoint p[4],
SkPoint midpoint) {
float numSegments_pow4 = wangs_formula::cubic_pow4(kTessellationPrecision,
p,
fTotalVectorXform);
if (numSegments_pow4 > fMaxSegments_pow4) {
this->chopAndWriteCubicWedges(shaderCaps, p, midpoint);
return;
CubicPatch(this) << p0 << ab.lo << abc.lo << abcd.lo; // Write 1st cubic.
CubicPatch(this) << abcd.lo << middle << abcd.hi; // Write 2nd cubic.
std::tie(p0, p1, p2) = {abcd.hi, bcd.hi, cd.hi}; // Save 3rd cubic.
}
if (numPatches == 2) {
// Chop into 2 cubics.
float2 ab = (p0 + p1) * .5f;
float2 bc = (p1 + p2) * .5f;
float2 cd = (p2 + p3) * .5f;
float2 abc = (ab + bc) * .5f;
float2 bcd = (bc + cd) * .5f;
float2 abcd = (abc + bcd) * .5f;
CubicPatch(this) << p0 << ab << abc << abcd; // Write 1st cubic.
CubicPatch(this) << abcd << bcd << cd << p3; // Write 2nd cubic.
} else {
SkASSERT(numPatches == 1);
CubicPatch(this) << p0 << p1 << p2 << p3; // Write single cubic.
}
fNumFixedSegments_pow4 = fMaxSegments_pow4;
}
if (VertexWriter vertexWriter = fChunker.appendVertex()) {
fPathXform.map4Points(&vertexWriter, p);
vertexWriter << midpoint
<< VertexWriter::If(!shaderCaps.infinitySupport(),
GrTessellationShader::kCubicCurveType);
}
fNumFixedSegments_pow4 = std::max(numSegments_pow4, fNumFixedSegments_pow4);
}
int numFixedSegments_pow4() const { return fNumFixedSegments_pow4; }
private:
void chopAndWriteQuadraticWedges(const GrShaderCaps& shaderCaps,
const SkPoint p[3],
SkPoint midpoint) {
SkPoint chops[5];
SkChopQuadAtHalf(p, chops);
this->writeQuadraticWedge(shaderCaps, chops, midpoint);
this->writeQuadraticWedge(shaderCaps, chops + 2, midpoint);
}
template <typename T>
static VertexWriter::Conditional<T> If(bool c, const T& v) { return VertexWriter::If(c,v); }
void chopAndWriteConicWedges(const GrShaderCaps& shaderCaps,
const SkConic& conic,
SkPoint midpoint) {
SkConic chops[2];
if (!conic.chopAt(.5, chops)) {
return;
// RAII. Appends a patch during construction and writes the remaining data for a cubic during
// destruction. The caller outputs p0,p1,p2,p3 (8 floats):
//
// CubicPatch(this) << p0 << p1 << p2 << p3;
//
struct CubicPatch {
CubicPatch(WedgeWriter* _this) : fThis(_this), fVertexWriter(fThis->appendPatch()) {}
~CubicPatch() {
fVertexWriter << fThis->fMidpoint
<< If(!fThis->fGPUInfinitySupport, GrTessellationShader::kCubicCurveType);
}
this->writeConicWedge(shaderCaps, chops[0].fPts, chops[0].fW, midpoint);
this->writeConicWedge(shaderCaps, chops[1].fPts, chops[1].fW, midpoint);
}
operator VertexWriter&() { return fVertexWriter; }
WedgeWriter* fThis;
VertexWriter fVertexWriter;
};
void chopAndWriteCubicWedges(const GrShaderCaps& shaderCaps,
const SkPoint p[4],
SkPoint midpoint) {
SkPoint chops[7];
SkChopCubicAtHalf(p, chops);
this->writeCubicWedge(shaderCaps, chops, midpoint);
this->writeCubicWedge(shaderCaps, chops + 3, midpoint);
// RAII. Appends a patch during construction and writes the remaining data for a conic during
// destruction. The caller outputs p0,p1,p2,w (7 floats):
//
// ConicPatch(this) << p0 << p1 << p2 << w;
//
struct ConicPatch {
ConicPatch(WedgeWriter* _this) : fThis(_this), fVertexWriter(fThis->appendPatch()) {}
~ConicPatch() {
fVertexWriter << VertexWriter::kIEEE_32_infinity // p3.y=Inf indicates a conic.
<< fThis->fMidpoint
<< If(!fThis->fGPUInfinitySupport, GrTessellationShader::kConicCurveType);
}
operator VertexWriter&() { return fVertexWriter; }
WedgeWriter* fThis;
VertexWriter fVertexWriter;
};
VertexWriter appendPatch() {
VertexWriter vertexWriter = fChunker.appendVertex();
if (!vertexWriter) {
// Failed to allocate GPU storage for the patch. Write to a throwaway location so the
// callsites don't have to do null checks.
if (!fFallbackPatchStorage) {
fFallbackPatchStorage.reset(fChunker.stride());
}
vertexWriter = fFallbackPatchStorage.data();
}
return vertexWriter;
}
GrVertexChunkBuilder fChunker;
wangs_formula::VectorXform fTotalVectorXform;
PathXform fPathXform;
const float fMaxSegments_pow2;
const float fMaxSegments_pow4;
const bool fGPUInfinitySupport;
wangs_formula::VectorXform fTotalVectorXform;
PathXform fPathXform;
SkPoint fMidpoint;
// For when fChunker fails to allocate a patch in GPU memory.
SkAutoTMalloc<char> fFallbackPatchStorage;
// If using fixed count, this is the max number of curve segments we need to draw per instance.
float fNumFixedSegments_pow4 = 1;
};
} // namespace
PathTessellator* PathWedgeTessellator::Make(SkArenaAlloc* arena,
const SkMatrix& viewMatrix,
const SkPMColor4f& color,
@ -302,15 +401,15 @@ void PathWedgeTessellator::prepare(GrMeshDrawTarget* target,
} else {
maxSegments = GrPathTessellationShader::kMaxFixedCountSegments;
}
WedgeWriter wedgeWriter(target, &fVertexChunkArray, patchStride, wedgeAllocCount, maxSegments);
WedgeWriter wedgeWriter(target, &fVertexChunkArray, patchStride, wedgeAllocCount, maxSegments,
shaderCaps);
for (auto [pathMatrix, path] : pathDrawList) {
wedgeWriter.setMatrices(fShader->viewMatrix(), pathMatrix);
MidpointContourParser parser(path);
while (parser.parseNextContour()) {
SkPoint midpoint = wedgeWriter.pathXform().mapPoint(parser.currentMidpoint());
wedgeWriter.setMidpoint(parser.currentMidpoint());
SkPoint lastPoint = {0, 0};
SkPoint startPoint = {0, 0};
SkPoint lastPoint = startPoint;
for (auto [verb, pts, w] : parser.currentContour()) {
switch (verb) {
case SkPathVerb::kMove:
@ -319,25 +418,26 @@ void PathWedgeTessellator::prepare(GrMeshDrawTarget* target,
case SkPathVerb::kClose:
break; // Ignore. We can assume an implicit close at the end.
case SkPathVerb::kLine:
wedgeWriter.writeFlatWedge(shaderCaps, pts[0], pts[1], midpoint);
wedgeWriter.lineTo(pts);
lastPoint = pts[1];
break;
case SkPathVerb::kQuad:
wedgeWriter.writeQuadraticWedge(shaderCaps, pts, midpoint);
wedgeWriter.quadTo(pts);
lastPoint = pts[2];
break;
case SkPathVerb::kConic:
wedgeWriter.writeConicWedge(shaderCaps, pts, *w, midpoint);
wedgeWriter.conicTo(pts, *w);
lastPoint = pts[2];
break;
case SkPathVerb::kCubic:
wedgeWriter.writeCubicWedge(shaderCaps, pts, midpoint);
wedgeWriter.cubicTo(pts);
lastPoint = pts[3];
break;
}
}
if (lastPoint != startPoint) {
wedgeWriter.writeFlatWedge(shaderCaps, lastPoint, startPoint, midpoint);
SkPoint pts[2] = {lastPoint, startPoint};
wedgeWriter.lineTo(pts);
}
}
}

View File

@ -38,23 +38,30 @@ public:
return fScale.lo * p + (fSkew.lo * skvx::shuffle<1,0>(p) + fTrans.lo);
}
SK_ALWAYS_INLINE float4 map2Points(float4 p0p1) const {
return fScale * p0p1 + (fSkew * p0p1.yxwz() + fTrans);
}
SK_ALWAYS_INLINE float2 map1Point(const SkPoint pt[1]) const {
return this->mapPoint(float2::Load(pt));
}
SK_ALWAYS_INLINE float4 map2Points(const SkPoint pts[2]) const {
return this->map2Points(float4::Load(pts));
}
SK_ALWAYS_INLINE SkPoint mapPoint(SkPoint p) const {
return skvx::bit_pun<SkPoint>(this->mapPoint(skvx::bit_pun<float2>(p)));
}
SK_ALWAYS_INLINE void map2Points(VertexWriter* writer, const SkPoint pts[2]) const {
float4 p = float4::Load(pts);
*writer << (fScale * p + (fSkew * skvx::shuffle<1,0,3,2>(p) + fTrans));
}
SK_ALWAYS_INLINE void map3Points(VertexWriter* writer, const SkPoint pts[3]) const {
*writer << this->mapPoint(pts[0]);
this->map2Points(writer, pts + 1);
*writer << this->map2Points(pts);
*writer << this->map1Point(pts + 2);
}
SK_ALWAYS_INLINE void map4Points(VertexWriter* writer, const SkPoint pts[4]) const {
this->map2Points(writer, pts);
this->map2Points(writer, pts + 2);
*writer << this->map2Points(pts);
*writer << this->map2Points(pts + 2);
}
// Emits a degenerate, 4-point transformed cubic bezier equal to a line.

View File

@ -35,18 +35,36 @@ template<int N> using uvec = skvx::Vec<N, uint32_t>;
using uint2 = uvec<2>;
using uint4 = uvec<4>;
SK_MAYBE_UNUSED SK_ALWAYS_INLINE float dot(float2 a, float2 b) {
#define AI SK_MAYBE_UNUSED SK_ALWAYS_INLINE
AI float dot(float2 a, float2 b) {
float2 ab = a*b;
return ab.x() + ab.y();
}
SK_MAYBE_UNUSED SK_ALWAYS_INLINE float cross(float2 a, float2 b) {
AI float cross(float2 a, float2 b) {
float2 x = a * b.yx();
return x[0] - x[1];
}
SK_MAYBE_UNUSED constexpr SK_ALWAYS_INLINE float pow2(float x) { return x*x; }
SK_MAYBE_UNUSED constexpr SK_ALWAYS_INLINE float pow4(float x) { return pow2(x*x); }
// This does not return b when t==1, but it otherwise seems to get better precision than
// "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
// The responsibility falls on the caller to check that t != 1 before calling.
template<int N>
AI vec<N> mix(vec<N> a, vec<N> b, vec<N> T) {
SkASSERT(all((0 <= T) & (T < 1)));
return (b - a)*T + a;
}
template<int N>
AI vec<N> mix(vec<N> a, vec<N> b, float T) {
return mix(a, b, vec<N>(T));
}
AI constexpr float pow2(float x) { return x*x; }
AI constexpr float pow4(float x) { return pow2(x*x); }
#undef AI
// Don't tessellate paths that might have an individual curve that requires more than 1024 segments.
// (See wangs_formula::worst_case_cubic). If this is the case, call "PreChopPathCurves" first.