Implement a fixed count stroke tessellator

This new tessellator renders strokes as fixed-count triangle strip
instances. Any extra triangles not needed by the instance are emitted
as degenerate triangles. Since it draws in order, this tessellator
allows us to batch dynamic colors even when hw tessellation is not
supported.

Bug: skia:10419
Change-Id: If03a8b76319471ae4d4580dda019b69204d9197b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/398416
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
Chris Dalton 2021-04-20 00:45:50 -06:00 committed by Skia Commit-Bot
parent 57ab06c14e
commit 82007f568d
12 changed files with 572 additions and 124 deletions

View File

@ -13,6 +13,7 @@
#include "src/gpu/mock/GrMockOpTarget.h"
#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
#include "src/gpu/tessellate/GrPathTessellator.h"
#include "src/gpu/tessellate/GrStrokeFixedCountTessellator.h"
#include "src/gpu/tessellate/GrStrokeHardwareTessellator.h"
#include "src/gpu/tessellate/GrStrokeIndirectTessellator.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
@ -203,6 +204,24 @@ DEF_PATH_TESS_BENCH(middle_out_triangulation,
}
using PathStrokeList = GrStrokeTessellator::PathStrokeList;
using MakeTessellatorFn = std::unique_ptr<GrStrokeTessellator>(*)(ShaderFlags, const SkMatrix&,
PathStrokeList*,
const GrShaderCaps&);
static std::unique_ptr<GrStrokeTessellator> make_hw_tessellator(ShaderFlags shaderFlags,
const SkMatrix& viewMatrix,
PathStrokeList* pathStrokeList,
const GrShaderCaps& shaderCaps) {
return std::make_unique<GrStrokeHardwareTessellator>(shaderFlags, viewMatrix, pathStrokeList,
shaderCaps);
}
static std::unique_ptr<GrStrokeTessellator> make_fixed_count_tessellator(
ShaderFlags shaderFlags, const SkMatrix& viewMatrix, PathStrokeList* pathStrokeList,
const GrShaderCaps& shaderCaps) {
return std::make_unique<GrStrokeFixedCountTessellator>(shaderFlags, viewMatrix, pathStrokeList);
}
using MakePathStrokesFn = std::vector<PathStrokeList>(*)();
static std::vector<PathStrokeList> make_simple_cubic_path() {
@ -270,14 +289,15 @@ static std::vector<PathStrokeList> make_motionmark_paths() {
return pathStrokes;
}
class GrStrokeHardwareTessellator::TestingOnly_Benchmark : public Benchmark {
class TessPrepareBench : public Benchmark {
public:
TestingOnly_Benchmark(MakePathStrokesFn MakePathStrokesFn, ShaderFlags shaderFlags,
float matrixScale, const char* suffix)
: fMakePathStrokesFn(MakePathStrokesFn)
TessPrepareBench(MakePathStrokesFn makePathStrokesFn, MakeTessellatorFn makeTessellatorFn,
ShaderFlags shaderFlags, float matrixScale, const char* suffix)
: fMakePathStrokesFn(makePathStrokesFn)
, fMakeTessellatorFn(makeTessellatorFn)
, fShaderFlags(shaderFlags)
, fMatrixScale(matrixScale) {
fName.printf("tessellate_GrStrokeHardwareTessellator_prepare%s", suffix);
fName.printf("tessellate_%s", suffix);
}
private:
@ -298,43 +318,58 @@ private:
}
fTotalVerbCount += fPathStrokes[i].fPath.countVerbs();
}
fTessellator = fMakeTessellatorFn(fShaderFlags, SkMatrix::Scale(fMatrixScale, fMatrixScale),
fPathStrokes.data(), *fTarget->caps().shaderCaps());
}
void onDraw(int loops, SkCanvas*) final {
SkMatrix matrix = SkMatrix::Scale(fMatrixScale, fMatrixScale);
for (int i = 0; i < loops; ++i) {
GrStrokeHardwareTessellator tessellator(fShaderFlags, matrix, fPathStrokes.data(),
*fTarget->caps().shaderCaps());
tessellator.prepare(fTarget.get(), fTotalVerbCount);
fTessellator->prepare(fTarget.get(), fTotalVerbCount);
fTarget->resetAllocator();
}
}
SkString fName;
MakePathStrokesFn fMakePathStrokesFn;
MakeTessellatorFn fMakeTessellatorFn;
const ShaderFlags fShaderFlags;
float fMatrixScale;
std::unique_ptr<GrMockOpTarget> fTarget;
std::vector<PathStrokeList> fPathStrokes;
std::unique_ptr<GrStrokeTessellator> fTessellator;
SkArenaAlloc fPersistentArena{1024};
int fTotalVerbCount = 0;
};
DEF_BENCH(
return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path,
ShaderFlags::kNone, 1, "");
DEF_BENCH(return new TessPrepareBench(
make_simple_cubic_path, make_hw_tessellator, ShaderFlags::kNone, 1,
"GrStrokeHardwareTessellator");
)
DEF_BENCH(
return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_simple_cubic_path,
ShaderFlags::kNone, 5,
"_one_chop");
DEF_BENCH(return new TessPrepareBench(
make_simple_cubic_path, make_hw_tessellator, ShaderFlags::kNone, 5,
"GrStrokeHardwareTessellator_one_chop");
)
DEF_BENCH(
return new GrStrokeHardwareTessellator::TestingOnly_Benchmark(make_motionmark_paths,
ShaderFlags::kDynamicStroke, 1,
"_motionmark");
DEF_BENCH(return new TessPrepareBench(
make_motionmark_paths, make_hw_tessellator, ShaderFlags::kDynamicStroke, 1,
"GrStrokeHardwareTessellator_motionmark");
)
DEF_BENCH(return new TessPrepareBench(
make_simple_cubic_path, make_fixed_count_tessellator, ShaderFlags::kNone, 1,
"GrStrokeFixedCountTessellator");
)
DEF_BENCH(return new TessPrepareBench(
make_simple_cubic_path, make_fixed_count_tessellator, ShaderFlags::kNone, 5,
"GrStrokeFixedCountTessellator_one_chop");
)
DEF_BENCH(return new TessPrepareBench(
make_motionmark_paths, make_fixed_count_tessellator, ShaderFlags::kDynamicStroke, 1,
"GrStrokeFixedCountTessellator_motionmark");
)
class GrStrokeIndirectTessellator::Benchmark : public ::Benchmark {

View File

@ -475,6 +475,8 @@ skia_gpu_sources = [
"$_src/gpu/tessellate/GrPathTessellator.h",
"$_src/gpu/tessellate/GrStencilPathShader.cpp",
"$_src/gpu/tessellate/GrStencilPathShader.h",
"$_src/gpu/tessellate/GrStrokeFixedCountTessellator.cpp",
"$_src/gpu/tessellate/GrStrokeFixedCountTessellator.h",
"$_src/gpu/tessellate/GrStrokeHardwareTessellator.cpp",
"$_src/gpu/tessellate/GrStrokeHardwareTessellator.h",
"$_src/gpu/tessellate/GrStrokeIndirectTessellator.cpp",

View File

@ -19,8 +19,8 @@
// we will end up writing.
struct GrVertexChunk {
sk_sp<const GrBuffer> fBuffer;
int fVertexCount = 0;
int fBaseVertex;
int fCount = 0;
int fBase; // baseVertex or baseInstance, depending on the use case.
};
// Represents an array of GrVertexChunks.
@ -45,7 +45,7 @@ public:
~GrVertexChunkBuilder() {
if (!fChunks->empty()) {
fTarget->putBackVertices(fCurrChunkVertexCapacity - fCurrChunkVertexCount, fStride);
fChunks->back().fVertexCount = fCurrChunkVertexCount;
fChunks->back().fCount = fCurrChunkVertexCount;
}
}
@ -68,7 +68,7 @@ private:
bool allocChunk(int minCount) {
if (!fChunks->empty()) {
// No need to put back vertices; the buffer is full.
fChunks->back().fVertexCount = fCurrChunkVertexCount;
fChunks->back().fCount = fCurrChunkVertexCount;
}
fCurrChunkVertexCount = 0;
GrVertexChunk* chunk = &fChunks->push_back();
@ -76,7 +76,7 @@ private:
fMinVerticesPerChunk * minCount,
fMinVerticesPerChunk * minCount,
&chunk->fBuffer,
&chunk->fBaseVertex,
&chunk->fBase,
&fCurrChunkVertexCapacity)};
if (!fCurrChunkVertexWriter || !chunk->fBuffer || fCurrChunkVertexCapacity < minCount) {
SkDebugf("WARNING: Failed to allocate vertex buffer for GrVertexChunk.\n");

View File

@ -0,0 +1,371 @@
/*
* Copyright 2021 Google LLC.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/gpu/tessellate/GrStrokeFixedCountTessellator.h"
#include "src/core/SkGeometry.h"
#include "src/gpu/geometry/GrPathUtils.h"
#include "src/gpu/tessellate/GrStrokeIterator.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
namespace {
constexpr static float kMaxParametricSegments_pow4 = 48*48*48*48; // 48^4
// Writes out strokes to the given instance chunk array, chopping if necessary so that all instances
// require 48 parametric segments or less. (We don't consider radial segments here. The tessellator
// will just add enough additional segments to handle a worst-case 180 degree stroke.)
class InstanceWriter {
public:
using ShaderFlags = GrStrokeTessellator::ShaderFlags;
InstanceWriter(ShaderFlags shaderFlags, GrMeshDrawOp::Target* target, float matrixMaxScale,
GrVertexChunkArray* patchChunks, size_t instanceStride, int minInstancesPerChunk)
: fShaderFlags(shaderFlags)
, fChunkBuilder(target, patchChunks, instanceStride, minInstancesPerChunk)
, fParametricPrecision(GrStrokeTolerances::CalcParametricPrecision(matrixMaxScale)) {
}
float parametricPrecision() const { return fParametricPrecision; }
// maxParametricSegments^4, or the number of parametric segments, raised to the 4th power,
// that are required by the single instance we've written that requires the most segments.
float maxParametricSegments_pow4() const { return fMaxParametricSegments_pow4; }
// Updates the dynamic stroke state that we will write out with each instance.
void updateDynamicStroke(const SkStrokeRec& stroke) {
SkASSERT(!fHasDeferredFirstStroke);
SkASSERT(fShaderFlags & ShaderFlags::kDynamicStroke);
fDynamicStroke.set(stroke);
}
// Updates the dynamic color state that we will write out with each instance.
void updateDynamicColor(const SkPMColor4f& color) {
SkASSERT(!fHasDeferredFirstStroke);
SkASSERT(fShaderFlags & ShaderFlags::kDynamicColor);
bool wideColor = fShaderFlags & ShaderFlags::kWideColor;
SkASSERT(wideColor || color.fitsInBytes());
fDynamicColor.set(color, wideColor);
}
void lineTo(SkPoint start, SkPoint end) {
SkPoint cubic[] = {start, start, end, end};
SkPoint endControlPoint = start;
this->writeStroke(cubic, endControlPoint);
}
void quadraticTo(const SkPoint p[3]) {
float numParametricSegments_pow4 = GrWangsFormula::quadratic_pow4(fParametricPrecision, p);
if (numParametricSegments_pow4 > kMaxParametricSegments_pow4) {
SkPoint chops[5];
SkChopQuadAtHalf(p, chops);
this->quadraticTo(chops);
this->quadraticTo(chops + 2);
return;
}
SkPoint cubic[4];
GrPathUtils::convertQuadToCubic(p, cubic);
SkPoint endControlPoint = cubic[2];
this->writeStroke(cubic, endControlPoint);
fMaxParametricSegments_pow4 = std::max(numParametricSegments_pow4,
fMaxParametricSegments_pow4);
}
void conicTo(const SkPoint p[3], float w) {
float numParametricSegments_pow2 = GrWangsFormula::conic_pow2(1/fParametricPrecision, p, w);
float numParametricSegments_pow4 = numParametricSegments_pow2 * numParametricSegments_pow2;
if (numParametricSegments_pow4 > kMaxParametricSegments_pow4) {
SkConic chops[2];
if (SkConic(p, w).chopAt(.5f, chops)) {
this->conicTo(chops[0].fPts, chops[0].fW);
this->conicTo(chops[1].fPts, chops[1].fW);
return;
}
}
SkPoint conic[4];
GrPathShader::WriteConicPatch(p, w, conic);
SkPoint endControlPoint = conic[1];
this->writeStroke(conic, endControlPoint);
fMaxParametricSegments_pow4 = std::max(numParametricSegments_pow4,
fMaxParametricSegments_pow4);
}
void cubicConvex180To(const SkPoint p[4]) {
float numParametricSegments_pow4 = GrWangsFormula::cubic_pow4(fParametricPrecision, p);
if (numParametricSegments_pow4 > kMaxParametricSegments_pow4) {
SkPoint chops[7];
SkChopCubicAtHalf(p, chops);
this->cubicConvex180To(chops);
this->cubicConvex180To(chops + 3);
return;
}
SkPoint endControlPoint = (p[3] != p[2]) ? p[2] : (p[2] != p[1]) ? p[1] : p[0];
this->writeStroke(p, endControlPoint);
fMaxParametricSegments_pow4 = std::max(numParametricSegments_pow4,
fMaxParametricSegments_pow4);
}
// Called when we encounter the verb "kMoveWithinContour". Moves invalidate the previous control
// point. The stroke iterator tells us the new value to use for the previous control point.
void setLastControlPoint(SkPoint newLastControlPoint) {
fLastControlPoint = newLastControlPoint;
fHasLastControlPoint = true;
}
// Draws a circle whose diameter is equal to the stroke width. We emit circles at cusp points
// round caps, and empty strokes that are specified to be drawn as circles.
void writeCircle(SkPoint location) {
if (GrVertexWriter writer = fChunkBuilder.appendVertex()) {
// The shader interprets an empty stroke + empty join as a special case that denotes a
// circle, or 180-degree point stroke.
writer.fill(location, 5);
this->writeDynamicAttribs(&writer);
}
}
void finishContour() {
if (fHasDeferredFirstStroke) {
// We deferred the first stroke because we didn't know the previous control point to use
// for its join. We write it out now.
SkASSERT(fHasLastControlPoint);
this->writeStroke(fDeferredFirstStroke, SkPoint());
fHasDeferredFirstStroke = false;
}
fHasLastControlPoint = false;
}
private:
void writeStroke(const SkPoint p[4], SkPoint endControlPoint) {
if (!fHasLastControlPoint) {
// We don't know the previous control point yet to use for the join. Defer writing out
// this stroke until the end.
memcpy(fDeferredFirstStroke, p, sizeof(fDeferredFirstStroke));
fHasDeferredFirstStroke = true;
fHasLastControlPoint = true;
} else if (GrVertexWriter writer = fChunkBuilder.appendVertex()) {
writer.writeArray(p, 4);
writer.write(fLastControlPoint);
this->writeDynamicAttribs(&writer);
}
fLastControlPoint = endControlPoint;
}
void writeDynamicAttribs(GrVertexWriter* writer) {
if (fShaderFlags & ShaderFlags::kDynamicStroke) {
writer->write(fDynamicStroke);
}
if (fShaderFlags & ShaderFlags::kDynamicColor) {
writer->write(fDynamicColor);
}
}
const ShaderFlags fShaderFlags;
GrVertexChunkBuilder fChunkBuilder;
const float fParametricPrecision;
float fMaxParametricSegments_pow4 = 1;
// We can't write out the first stroke until we know the previous control point for its join.
SkPoint fDeferredFirstStroke[4];
SkPoint fLastControlPoint; // Used to configure the joins in the instance data.
bool fHasDeferredFirstStroke = false;
bool fHasLastControlPoint = false;
// Values for the current dynamic state (if any) that will get written out with each instance.
GrStrokeTessellateShader::DynamicStroke fDynamicStroke;
GrVertexColor fDynamicColor;
};
// Returns the worst-case number of edges we will need in order to draw a join of the given type.
static int worst_case_edges_in_join(SkPaint::Join joinType, float numRadialSegmentsPerRadian) {
int numEdges = GrStrokeTessellateShader::NumFixedEdgesInJoin(joinType);
if (joinType == SkPaint::kRound_Join) {
// For round joins we need to count the radial edges on our own. Account for a worst-case
// join of 180 degrees (SK_ScalarPI radians).
numEdges += std::max(SkScalarCeilToInt(numRadialSegmentsPerRadian * SK_ScalarPI) - 1, 0);
}
return numEdges;
}
} // namespace
void GrStrokeFixedCountTessellator::prepare(GrMeshDrawOp::Target* target,
int totalCombinedVerbCnt) {
int maxEdgesInJoin = 0;
float maxRadialSegmentsPerRadian = 0;
std::array<float, 2> matrixMinMaxScales;
if (!fShader.viewMatrix().getMinMaxScales(matrixMinMaxScales.data())) {
matrixMinMaxScales.fill(1);
}
// Over-allocate enough patches for each stroke to chop once, and for 8 extra caps. Since we
// have to chop at inflections, points of 180 degree rotation, and anywhere a stroke requires
// too many parametric segments, many strokes will end up getting choppped.
int strokePreallocCount = totalCombinedVerbCnt * 2;
int capPreallocCount = 8;
int minInstancesPerChunk = strokePreallocCount + capPreallocCount;
InstanceWriter instanceWriter(fShaderFlags, target, matrixMinMaxScales[1], &fInstanceChunks,
fShader.instanceStride(), minInstancesPerChunk);
if (!(fShaderFlags & ShaderFlags::kDynamicStroke)) {
// Strokes are static. Calculate tolerances once.
const SkStrokeRec& stroke = fPathStrokeList->fStroke;
float localStrokeWidth = GrStrokeTolerances::GetLocalStrokeWidth(matrixMinMaxScales.data(),
stroke.getWidth());
float numRadialSegmentsPerRadian = GrStrokeTolerances::CalcNumRadialSegmentsPerRadian(
instanceWriter.parametricPrecision(), localStrokeWidth);
maxEdgesInJoin = worst_case_edges_in_join(stroke.getJoin(), numRadialSegmentsPerRadian);
maxRadialSegmentsPerRadian = numRadialSegmentsPerRadian;
}
// Fast SIMD queue that buffers up values for "numRadialSegmentsPerRadian". Only used when we
// have dynamic stroke.
GrStrokeToleranceBuffer toleranceBuffer(instanceWriter.parametricPrecision());
for (PathStrokeList* pathStroke = fPathStrokeList; pathStroke; pathStroke = pathStroke->fNext) {
const SkStrokeRec& stroke = pathStroke->fStroke;
if (fShaderFlags & ShaderFlags::kDynamicStroke) {
// Strokes are dynamic. Calculate tolerances every time.
float numRadialSegmentsPerRadian =
toleranceBuffer.fetchRadialSegmentsPerRadian(pathStroke);
maxEdgesInJoin = std::max(
worst_case_edges_in_join(stroke.getJoin(), numRadialSegmentsPerRadian),
maxEdgesInJoin);
maxRadialSegmentsPerRadian = std::max(numRadialSegmentsPerRadian,
maxRadialSegmentsPerRadian);
instanceWriter.updateDynamicStroke(stroke);
}
if (fShaderFlags & ShaderFlags::kDynamicColor) {
instanceWriter.updateDynamicColor(pathStroke->fColor);
}
GrStrokeIterator strokeIter(pathStroke->fPath, &pathStroke->fStroke, &fShader.viewMatrix());
while (strokeIter.next()) {
const SkPoint* p = strokeIter.pts();
switch (strokeIter.verb()) {
using Verb = GrStrokeIterator::Verb;
int numChops;
case Verb::kContourFinished:
instanceWriter.finishContour();
break;
case Verb::kCircle:
// Round cap or else an empty stroke that is specified to be drawn as a circle.
instanceWriter.writeCircle(p[0]);
[[fallthrough]];
case Verb::kMoveWithinContour:
instanceWriter.setLastControlPoint(p[0]);
break;
case Verb::kLine:
instanceWriter.lineTo(p[0], p[1]);
break;
case Verb::kQuad:
if (GrPathUtils::conicHasCusp(p)) {
// The cusp is always at the midtandent.
SkPoint cusp = SkEvalQuadAt(p, SkFindQuadMidTangent(p));
instanceWriter.writeCircle(cusp);
// A quad can only have a cusp if it's flat with a 180-degree turnaround.
instanceWriter.lineTo(p[0], cusp);
instanceWriter.lineTo(cusp, p[2]);
} else {
instanceWriter.quadraticTo(p);
}
break;
case Verb::kConic:
if (GrPathUtils::conicHasCusp(p)) {
// The cusp is always at the midtandent.
SkConic conic(p, strokeIter.w());
SkPoint cusp = conic.evalAt(conic.findMidTangent());
instanceWriter.writeCircle(cusp);
// A conic can only have a cusp if it's flat with a 180-degree turnaround.
instanceWriter.lineTo(p[0], cusp);
instanceWriter.lineTo(cusp, p[2]);
} else {
instanceWriter.conicTo(p, strokeIter.w());
}
break;
case Verb::kCubic:
SkPoint chops[10];
float T[2];
bool areCusps;
numChops = GrPathUtils::findCubicConvex180Chops(p, T, &areCusps);
if (numChops == 0) {
instanceWriter.cubicConvex180To(p);
} else if (numChops == 1) {
SkChopCubicAt(p, chops, T[0]);
if (areCusps) {
instanceWriter.writeCircle(chops[3]);
// In a perfect world, these 3 points would be be equal after chopping
// on a cusp.
chops[2] = chops[4] = chops[3];
}
instanceWriter.cubicConvex180To(chops);
instanceWriter.cubicConvex180To(chops + 3);
} else {
SkASSERT(numChops == 2);
SkChopCubicAt(p, chops, T[0], T[1]);
if (areCusps) {
instanceWriter.writeCircle(chops[3]);
instanceWriter.writeCircle(chops[6]);
// Two cusps are only possible if it's a flat line with two 180-degree
// turnarounds.
instanceWriter.lineTo(chops[0], chops[3]);
instanceWriter.lineTo(chops[3], chops[6]);
instanceWriter.lineTo(chops[6], chops[9]);
} else {
instanceWriter.cubicConvex180To(chops);
instanceWriter.cubicConvex180To(chops + 3);
instanceWriter.cubicConvex180To(chops + 6);
}
}
break;
}
}
}
// The maximum rotation we can have in a stroke is 180 degrees (SK_ScalarPI radians).
int maxRadialSegmentsInStroke =
std::max(SkScalarCeilToInt(maxRadialSegmentsPerRadian * SK_ScalarPI), 1);
int maxParametricSegmentsInStroke = SkScalarCeilToInt(sqrtf(sqrtf(
instanceWriter.maxParametricSegments_pow4())));
SkASSERT(maxParametricSegmentsInStroke >= 1); // maxParametricSegments_pow4 is always >= 1.
// Now calculate the maximum number of edges we will need in the stroke portion of the instance.
// The first and last edges in a stroke are shared by both the parametric and radial sets of
// edges, so the total number of edges is:
//
// numCombinedEdges = numParametricEdges + numRadialEdges - 2
//
// It's also important to differentiate between the number of edges and segments in a strip:
//
// numSegments = numEdges - 1
//
// So the total number of combined edges in the stroke is:
//
// numEdgesInStroke = numParametricSegments + 1 + numRadialSegments + 1 - 2
// = numParametricSegments + numRadialSegments
//
int maxEdgesInStroke = maxRadialSegmentsInStroke + maxParametricSegmentsInStroke;
// Each triangle strip has two sections: It starts with a join then transitions to a stroke. The
// number of edges in an instance is the sum of edges from the join and stroke sections both.
// NOTE: The final join edge and the first stroke edge are co-located, however we still need to
// emit both because the join's edge is half-width and the stroke's is full-width.
int fixedEdgeCount = maxEdgesInJoin + maxEdgesInStroke;
fShader.setFixedCountNumTotalEdges(fixedEdgeCount);
fFixedVertexCount = fixedEdgeCount * 2;
}
void GrStrokeFixedCountTessellator::draw(GrOpFlushState* flushState) const {
if (fInstanceChunks.empty() || fFixedVertexCount <= 0) {
return;
}
for (const auto& instanceChunk : fInstanceChunks) {
flushState->bindBuffers(nullptr, instanceChunk.fBuffer, nullptr);
flushState->drawInstanced(instanceChunk.fCount, instanceChunk.fBase, fFixedVertexCount, 0);
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright 2021 Google LLC.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef GrStrokeFixedCountTessellator_DEFINED
#define GrStrokeFixedCountTessellator_DEFINED
#include "src/gpu/GrVertexChunkArray.h"
#include "src/gpu/tessellate/GrStrokeTessellator.h"
// Renders strokes as fixed-count triangle strip instances. Any extra triangles not needed by the
// instance are emitted as degenerate triangles.
class GrStrokeFixedCountTessellator : public GrStrokeTessellator {
public:
GrStrokeFixedCountTessellator(ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
PathStrokeList* pathStrokeList)
: GrStrokeTessellator(GrStrokeTessellateShader::Mode::kFixedCount, shaderFlags,
viewMatrix, pathStrokeList) {
}
void prepare(GrMeshDrawOp::Target*, int totalCombinedVerbCnt) override;
void draw(GrOpFlushState*) const override;
private:
GrVertexChunkArray fInstanceChunks;
int fFixedVertexCount = 0;
};
#endif

View File

@ -856,8 +856,8 @@ void GrStrokeHardwareTessellator::prepare(GrMeshDrawOp::Target* target, int tota
}
void GrStrokeHardwareTessellator::draw(GrOpFlushState* flushState) const {
for (const auto& chunk : fPatchChunks) {
flushState->bindBuffers(nullptr, nullptr, chunk.fBuffer);
flushState->draw(chunk.fVertexCount, chunk.fBaseVertex);
for (const auto& vertexChunk : fPatchChunks) {
flushState->bindBuffers(nullptr, nullptr, vertexChunk.fBuffer);
flushState->draw(vertexChunk.fCount, vertexChunk.fBase);
}
}

View File

@ -18,8 +18,8 @@ class GrStrokeHardwareTessellator : public GrStrokeTessellator {
public:
GrStrokeHardwareTessellator(ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
PathStrokeList* pathStrokeList, const GrShaderCaps&)
: GrStrokeTessellator(GrStrokeTessellateShader::Mode::kTessellation, shaderFlags,
viewMatrix, pathStrokeList) {
: GrStrokeTessellator(GrStrokeTessellateShader::Mode::kHardwareTessellation,
shaderFlags, viewMatrix, pathStrokeList) {
}
void prepare(GrMeshDrawOp::Target*, int totalCombinedVerbCnt) override;
@ -27,10 +27,6 @@ public:
private:
GrVertexChunkArray fPatchChunks;
public:
// This class is used to benchmark prepareBuffers().
class TestingOnly_Benchmark;
};
#endif

View File

@ -442,7 +442,7 @@ GrStrokeIndirectTessellator::GrStrokeIndirectTessellator(ShaderFlags shaderFlags
PathStrokeList* pathStrokeList,
int totalCombinedVerbCnt,
SkArenaAlloc* alloc)
: GrStrokeTessellator(GrStrokeTessellateShader::Mode::kIndirect, shaderFlags,
: GrStrokeTessellator(GrStrokeTessellateShader::Mode::kLog2Indirect, shaderFlags,
viewMatrix, pathStrokeList) {
// The maximum potential number of values we will need in fResolveLevels is:
//
@ -472,7 +472,7 @@ GrStrokeIndirectTessellator::GrStrokeIndirectTessellator(ShaderFlags shaderFlags
lastStrokeWidth = stroke.getWidth();
}
fMaxNumExtraEdgesInJoin = std::max(fMaxNumExtraEdgesInJoin,
GrStrokeTessellateShader::NumExtraEdgesInIndirectJoin(stroke.getJoin()));
GrStrokeTessellateShader::NumFixedEdgesInJoin(stroke.getJoin()));
// Iterate through each verb in the stroke, counting its resolveLevel(s).
GrStrokeIterator iter(pathStroke->fPath, &stroke, &viewMatrix);
while (iter.next()) {

View File

@ -11,6 +11,7 @@
#include "src/gpu/GrRecordingContextPriv.h"
#include "src/gpu/tessellate/GrFillPathShader.h"
#include "src/gpu/tessellate/GrStencilPathShader.h"
#include "src/gpu/tessellate/GrStrokeFixedCountTessellator.h"
#include "src/gpu/tessellate/GrStrokeHardwareTessellator.h"
#include "src/gpu/tessellate/GrStrokeIndirectTessellator.h"
@ -111,15 +112,6 @@ GrOp::CombineResult GrStrokeTessellateOp::onCombineIfPossible(GrOp* grOp, SkAren
}
}
// The indirect tessellator can't combine overlapping, mismatched colors because the log2
// binning draws things out of order. But we can still chain them together and generate a single
// long list of indirect draws.
if ((combinedFlags & ShaderFlags::kDynamicColor) &&
!this->canUseHardwareTessellation(caps) &&
this->bounds().intersects(op->bounds())) {
return CombineResult::kMayChain;
}
fShaderFlags = combinedFlags;
// Concat the op's PathStrokeList. Since the head element is allocated inside the op, we need to
@ -164,41 +156,22 @@ void GrStrokeTessellateOp::prePrepareTessellator(GrPathShader::ProgramArgs&& arg
const GrCaps& caps = *args.fCaps;
SkArenaAlloc* arena = args.fArena;
// Only use hardware tessellation if we need dynamic color or if the path has a somewhat large
// number of verbs. Otherwise we seem to be better off using indirect draws.
if (this->canUseHardwareTessellation(caps) &&
((fShaderFlags & ShaderFlags::kDynamicColor) || fTotalCombinedVerbCnt > 50)) {
SkASSERT(!this->nextInChain()); // We never chain when hw tessellation is an option.
if (fTotalCombinedVerbCnt > 50 && this->canUseHardwareTessellation(caps)) {
// Only use hardware tessellation if we're drawing a somewhat large number of verbs.
// Otherwise we seem to be better off using instanced draws.
fTessellator = arena->make<GrStrokeHardwareTessellator>(fShaderFlags, fViewMatrix,
&fPathStrokeList,
*caps.shaderCaps());
} else if (fTotalCombinedVerbCnt > 50 && !(fShaderFlags & ShaderFlags::kDynamicColor)) {
// Only use the log2 indirect tessellator if we're drawing a somewhat large number of verbs
// and the stroke doesn't use dynamic color. (The log2 indirect tessellator can't support
// dynamic color without a z-buffer, due to how it reorders strokes.)
fTessellator = arena->make<GrStrokeIndirectTessellator>(fShaderFlags, fViewMatrix,
&fPathStrokeList,
fTotalCombinedVerbCnt, arena);
} else {
if (this->nextInChain()) {
// We are a chained list of indirect stroke ops. The only reason we would have chained
// is if everything was a match except color.
fShaderFlags |= ShaderFlags::kDynamicColor;
// Collect any other shader flags in the chain.
const SkStrokeRec& headStroke = this->headStroke();
for (GrStrokeTessellateOp* op = this->nextInChain(); op; op = op->nextInChain()) {
fShaderFlags |= op->fShaderFlags;
if (!(fShaderFlags & ShaderFlags::kDynamicStroke) &&
!DynamicStroke::StrokesHaveEqualDynamicState(headStroke, op->headStroke())) {
fShaderFlags |= ShaderFlags::kDynamicStroke;
}
}
}
auto* headTessellator = arena->make<GrStrokeIndirectTessellator>(
fShaderFlags, fViewMatrix, &fPathStrokeList, fTotalCombinedVerbCnt, arena);
// Make a tessellator for every chained op after us. These will all append to the head
// tessellator's shared indirect-draw list during prepare().
for (GrStrokeTessellateOp* op = this->nextInChain(); op; op = op->nextInChain()) {
SkASSERT(fViewMatrix == op->fViewMatrix);
auto* chainedTessellator = arena->make<GrStrokeIndirectTessellator>(
fShaderFlags, fViewMatrix, &op->fPathStrokeList, op->fTotalCombinedVerbCnt,
arena);
headTessellator->addToChain(chainedTessellator);
}
fTessellator = headTessellator;
fTessellator = arena->make<GrStrokeFixedCountTessellator>(fShaderFlags, fViewMatrix,
&fPathStrokeList);
}
auto* pipeline = GrFillPathShader::MakeFillPassPipeline(args, fAAType, std::move(clip),

View File

@ -28,9 +28,6 @@ private:
SkStrokeRec& headStroke() { return fPathStrokeList.fStroke; }
SkPMColor4f& headColor() { return fPathStrokeList.fColor; }
GrStrokeTessellateOp* nextInChain() const {
return static_cast<GrStrokeTessellateOp*>(this->GrDrawOp::nextInChain());
}
// Returns whether it is a good tradeoff to use the dynamic states flagged in the given
// bitfield. Dynamic states improve batching, but if they aren't already enabled, they come at

View File

@ -466,7 +466,7 @@ SkString GrStrokeTessellateShader::getTessControlShaderGLSL(
const char* versionAndExtensionDecls,
const GrGLSLUniformHandler& uniformHandler,
const GrShaderCaps& shaderCaps) const {
SkASSERT(fMode == Mode::kTessellation);
SkASSERT(fMode == Mode::kHardwareTessellation);
auto impl = static_cast<const GrStrokeTessellateShader::TessellationImpl*>(glslGeomProc);
SkString code(versionAndExtensionDecls);
@ -826,7 +826,7 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
const char* versionAndExtensionDecls,
const GrGLSLUniformHandler& uniformHandler,
const GrShaderCaps& shaderCaps) const {
SkASSERT(fMode == Mode::kTessellation);
SkASSERT(fMode == Mode::kHardwareTessellation);
auto impl = static_cast<const GrStrokeTessellateShader::TessellationImpl*>(glslGeomProc);
SkString code(versionAndExtensionDecls);
@ -995,7 +995,7 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
return code;
}
class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
class GrStrokeTessellateShader::InstancedImpl : public GrGLSLGeometryProcessor {
void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
const auto& shader = args.fGeomProc.cast<GrStrokeTessellateShader>();
SkPaint::Join joinType = shader.fStroke.getJoin();
@ -1043,6 +1043,18 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
float JOIN_TYPE = dynamicStrokeAttr.y;)", parametricPrecisionName);
}
if (shader.fMode == Mode::kLog2Indirect) {
args.fVertBuilder->codeAppend(R"(
float NUM_TOTAL_EDGES = abs(argsAttr.z);)");
} else {
SkASSERT(shader.fMode == Mode::kFixedCount);
const char* edgeCountName;
fEdgeCountUniform = args.fUniformHandler->addUniform(
nullptr, kVertex_GrShaderFlag, kFloat_GrSLType, "edgeCount", &edgeCountName);
args.fVertBuilder->codeAppendf(R"(
float NUM_TOTAL_EDGES = %s;)", edgeCountName);
}
// View matrix uniforms.
if (!shader.viewMatrix().isIdentity()) {
const char* translateName, *affineMatrixName;
@ -1075,9 +1087,8 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
P = AFFINE_MATRIX * P;
lastControlPoint = AFFINE_MATRIX * lastControlPoint;)");
}
args.fVertBuilder->codeAppend(R"(
float numTotalEdges = abs(argsAttr.z);
args.fVertBuilder->codeAppend(R"(
// Find how many parametric segments this stroke requires.
float numParametricSegments = min(wangs_formula(P, w, PARAMETRIC_PRECISION),
float(1 << MAX_PARAMETRIC_SEGMENTS_LOG2));
@ -1096,6 +1107,7 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
tan1 = float2(-1,0);
})");
// Potential optimization: (shader.hasDynamicStroke() && shader.hasRoundJoins())?
if (shader.fStroke.getJoin() == SkPaint::kRound_Join || shader.hasDynamicStroke()) {
args.fVertBuilder->codeAppend(R"(
// Determine how many edges to give to the round join. We emit the first and final edges
@ -1107,13 +1119,17 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
// +2 because we emit the beginning and ending edges twice (see above comment).
float numEdgesInJoin = numRadialSegmentsInJoin + 2;
// The stroke section needs at least two edges. Don't assign more to the join than
// "numTotalEdges - 2".
numEdgesInJoin = min(numEdgesInJoin, numTotalEdges - 2);
// Negative argsAttr.z means the join is a chop, and chop joins get exactly one segment.
if (argsAttr.z < 0) {
// +2 because we emit the beginning and ending edges twice (see above comment).
numEdgesInJoin = 1 + 2;
})");
// "NUM_TOTAL_EDGES - 2".
numEdgesInJoin = min(numEdgesInJoin, NUM_TOTAL_EDGES - 2);)");
if (shader.fMode == Mode::kLog2Indirect) {
args.fVertBuilder->codeAppend(R"(
// Negative argsAttr.z means the join is an internal chop or circle, and both of
// those have empty joins. All we need is a bevel join.
if (argsAttr.z < 0) {
// +2 because we emit the beginning and ending edges twice (see above comment).
numEdgesInJoin = 1 + 2;
})");
}
if (shader.hasDynamicStroke()) {
args.fVertBuilder->codeAppend(R"(
if (JOIN_TYPE >= 0 /*Is the join not a round type?*/) {
@ -1124,7 +1140,7 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
}
} else {
args.fVertBuilder->codeAppendf(R"(
float numEdgesInJoin = %i;)", NumExtraEdgesInIndirectJoin(joinType));
float numEdgesInJoin = %i;)", NumFixedEdgesInJoin(joinType));
}
args.fVertBuilder->codeAppend(R"(
@ -1183,7 +1199,7 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
combinedEdgeID = max(combinedEdgeID, 0);
} else {
// We belong to the stroke.
float maxCombinedSegments = numTotalEdges - numEdgesInJoin - 1;
float maxCombinedSegments = NUM_TOTAL_EDGES - numEdgesInJoin - 1;
numRadialSegments = max(ceil(abs(rotation) * NUM_RADIAL_SEGMENTS_PER_RADIAN), 1);
numRadialSegments = min(numRadialSegments, maxCombinedSegments);
numParametricSegments = min(numParametricSegments,
@ -1284,6 +1300,11 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
GrStrokeTolerances::CalcParametricPrecision(maxScale));
}
if (shader.fMode == Mode::kFixedCount) {
SkASSERT(shader.fFixedCountNumTotalEdges != 0);
pdman.set1f(fEdgeCountUniform, (float)shader.fFixedCountNumTotalEdges);
}
// Set up the view matrix, if any.
const SkMatrix& m = shader.viewMatrix();
if (!m.isIdentity()) {
@ -1300,17 +1321,20 @@ class GrStrokeTessellateShader::IndirectImpl : public GrGLSLGeometryProcessor {
GrGLSLUniformHandler::UniformHandle fTessControlArgsUniform;
GrGLSLUniformHandler::UniformHandle fTranslateUniform;
GrGLSLUniformHandler::UniformHandle fAffineMatrixUniform;
GrGLSLUniformHandler::UniformHandle fEdgeCountUniform;
GrGLSLUniformHandler::UniformHandle fColorUniform;
};
void GrStrokeTessellateShader::getGLSLProcessorKey(const GrShaderCaps&,
GrProcessorKeyBuilder* b) const {
bool keyNeedsJoin = (fMode == Mode::kIndirect) && !(fShaderFlags & ShaderFlags::kDynamicStroke);
bool keyNeedsJoin = (fMode != Mode::kHardwareTessellation) &&
!(fShaderFlags & ShaderFlags::kDynamicStroke);
SkASSERT((int)fMode >> 2 == 0);
SkASSERT(fStroke.getJoin() >> 2 == 0);
// Attribs get worked into the key automatically during GrGeometryProcessor::getAttributeKey().
// When color is in a uniform, it's always wide. kWideColor doesn't need to be considered here.
uint32_t key = (uint32_t)(fShaderFlags & ~ShaderFlags::kWideColor);
key = (key << 1) | (uint32_t)fMode;
key = (key << 2) | (uint32_t)fMode;
key = (key << 2) | ((keyNeedsJoin) ? fStroke.getJoin() : 0);
key = (key << 1) | (uint32_t)fStroke.isHairlineStyle();
key = (key << 1) | (uint32_t)this->viewMatrix().isIdentity();
@ -1318,6 +1342,6 @@ void GrStrokeTessellateShader::getGLSLProcessorKey(const GrShaderCaps&,
}
GrGLSLGeometryProcessor* GrStrokeTessellateShader::createGLSLInstance(const GrShaderCaps&) const {
return (fMode == Mode::kTessellation) ? (GrGLSLGeometryProcessor*)new TessellationImpl
: new IndirectImpl;
return (fMode == Mode::kHardwareTessellation) ? (GrGLSLGeometryProcessor*) new TessellationImpl
: new InstancedImpl;
}

View File

@ -27,9 +27,10 @@ class GrGLSLUniformHandler;
class GrStrokeTessellateShader : public GrPathShader {
public:
// Are we using hardware tessellation or indirect draws?
enum class Mode : bool {
kTessellation,
kIndirect
enum class Mode {
kHardwareTessellation,
kLog2Indirect,
kFixedCount
};
enum class ShaderFlags {
@ -42,24 +43,26 @@ public:
GR_DECL_BITFIELD_CLASS_OPS_FRIENDS(ShaderFlags);
// When using indirect draws, we expect a fixed number of additional edges to be appended onto
// each instance in order to implement its preceding join. Specifically, each join emits:
// Returns the fixed number of edges that are always emitted with the given join type. If the
// join is round, the caller needs to account for the additional radial edges on their own.
// Specifically, each join always emits:
//
// * Two colocated edges at the beginning (a double-sided edge to seam with the preceding
// stroke and a single-sided edge to seam with the join).
// * Two colocated edges at the beginning (a full-width edge to seam with the preceding stroke
// and a half-width edge to begin the join).
//
// * An extra edge in the middle for miter joins, or else a variable number for round joins
// (counted in the resolveLevel).
// * An extra edge in the middle for miter joins, or else a variable number of radial edges
// for round joins (the caller is responsible for counting radial edges from round joins).
//
// * A single sided edge at the end of the join that is colocated with the first (double
// sided) edge of the stroke
// * A half-width edge at the end of the join that will be colocated with the first
// (full-width) edge of the stroke.
//
constexpr static int NumExtraEdgesInIndirectJoin(SkPaint::Join joinType) {
constexpr static int NumFixedEdgesInJoin(SkPaint::Join joinType) {
switch (joinType) {
case SkPaint::kMiter_Join:
return 4;
case SkPaint::kRound_Join:
// The inner edges for round joins are counted in the stroke's resolveLevel.
// The caller is responsible for counting the variable number of middle, radial
// segments on round joins.
[[fallthrough]];
case SkPaint::kBevel_Join:
return 3;
@ -100,14 +103,14 @@ public:
GrStrokeTessellateShader(Mode mode, ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
const SkStrokeRec& stroke, SkPMColor4f color)
: GrPathShader(kTessellate_GrStrokeTessellateShader_ClassID, viewMatrix,
(mode == Mode::kTessellation) ?
(mode == Mode::kHardwareTessellation) ?
GrPrimitiveType::kPatches : GrPrimitiveType::kTriangleStrip,
(mode == Mode::kTessellation) ? 1 : 0)
(mode == Mode::kHardwareTessellation) ? 1 : 0)
, fMode(mode)
, fShaderFlags(shaderFlags)
, fStroke(stroke)
, fColor(color) {
if (fMode == Mode::kTessellation) {
if (fMode == Mode::kHardwareTessellation) {
// A join calculates its starting angle using prevCtrlPtAttr.
fAttribs.emplace_back("prevCtrlPtAttr", kFloat2_GrVertexAttribType, kFloat2_GrSLType);
// pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
@ -131,15 +134,19 @@ public:
// 180-degree point stroke.
fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
// "lastControlPoint" and "numTotalEdges" are both packed into argsAttr.
//
// A join calculates its starting angle using "argsAttr.xy=lastControlPoint".
//
// "abs(argsAttr.z=numTotalEdges)" tells the shader the literal number of edges in the
// triangle strip being rendered (i.e., it should be vertexCount/2). If numTotalEdges is
// negative and the join type is "kRound", it also instructs the shader to only allocate
// one segment the preceding round join.
fAttribs.emplace_back("argsAttr", kFloat3_GrVertexAttribType, kFloat3_GrSLType);
if (fMode == Mode::kLog2Indirect) {
// argsAttr.xy contains the lastControlPoint for setting up the join.
//
// "argsAttr.z=numTotalEdges" tells the shader the literal number of edges in the
// triangle strip being rendered (i.e., it should be vertexCount/2). If
// numTotalEdges is negative and the join type is "kRound", it also instructs the
// shader to only allocate one segment the preceding round join.
fAttribs.emplace_back("argsAttr", kFloat3_GrVertexAttribType, kFloat3_GrSLType);
} else {
SkASSERT(fMode == Mode::kFixedCount);
// argsAttr contains the lastControlPoint for setting up the join.
fAttribs.emplace_back("argsAttr", kFloat2_GrVertexAttribType, kFloat2_GrSLType);
}
}
if (fShaderFlags & ShaderFlags::kDynamicStroke) {
fAttribs.emplace_back("dynamicStrokeAttr", kFloat2_GrVertexAttribType,
@ -152,7 +159,7 @@ public:
: kUByte4_norm_GrVertexAttribType,
kHalf4_GrSLType);
}
if (fMode == Mode::kTessellation) {
if (fMode == Mode::kHardwareTessellation) {
this->setVertexAttributes(fAttribs.data(), fAttribs.count());
} else {
this->setInstanceAttributes(fAttribs.data(), fAttribs.count());
@ -164,6 +171,13 @@ public:
bool hasDynamicStroke() const { return fShaderFlags & ShaderFlags::kDynamicStroke; }
bool hasDynamicColor() const { return fShaderFlags & ShaderFlags::kDynamicColor; }
// Used by GrFixedCountTessellator to configure the uniform value that tells the shader how many
// total edges are in the triangle strip.
void setFixedCountNumTotalEdges(int value) {
SkASSERT(fMode == Mode::kFixedCount);
fFixedCountNumTotalEdges = value;
}
private:
const char* name() const override { return "GrStrokeTessellateShader"; }
void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override;
@ -186,8 +200,12 @@ private:
constexpr static int kMaxAttribCount = 5;
SkSTArray<kMaxAttribCount, Attribute> fAttribs;
// This is a uniform value used when fMode is kFixedCount that tells the shader how many total
// edges are in the triangle strip.
float fFixedCountNumTotalEdges = 0;
class TessellationImpl;
class IndirectImpl;
class InstancedImpl;
};
GR_MAKE_BITFIELD_CLASS_OPS(GrStrokeTessellateShader::ShaderFlags);