Add a tessellation mode that uses indirect draws

This mode is oftentimes faster than tessellation, and other times it
serves as a polyfill when tessellation just isn't supported.

Change-Id: I7b3d57fd0194c6869bfe28ee53ff0ff2e43df479
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/291036
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
Chris Dalton 2020-05-21 15:50:48 -06:00 committed by Skia Commit-Bot
parent 38e33df982
commit 02d7cf79be
10 changed files with 722 additions and 227 deletions

View File

@ -10,6 +10,8 @@
#include "src/core/SkPathPriv.h"
#include "src/gpu/GrContextPriv.h"
#include "src/gpu/GrOpFlushState.h"
#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
#include "src/gpu/tessellate/GrResolveLevelCounter.h"
#include "src/gpu/tessellate/GrTessellatePathOp.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
#include "tools/ToolUtils.h"
@ -79,7 +81,7 @@ public:
#undef UNIMPL
private:
SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 5];
SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 8];
GrDrawIndexedIndirectCommand fStaticDrawIndexedIndirectData[32];
SkSTArenaAlloc<1024 * 1024> fAllocator;
};
@ -95,10 +97,15 @@ public:
const char* onGetName() override { return fName.c_str(); }
bool isSuitableFor(Backend backend) final { return backend == kNonRendering_Backend; }
class MiddleOutInnerTrianglesBench;
class OuterCubicsBench;
class CubicWedgesBench;
class WangsFormulaBench;
class prepareMiddleOutStencilGeometry;
class prepareMiddleOutStencilGeometry_indirect;
class prepareIndirectOuterCubics;
class prepareTessellatedOuterCubics;
class prepareTessellatedCubicWedges;
class wangs_formula_cubic_log2;
class wangs_formula_cubic_log2_scale;
class wangs_formula_cubic_log2_affine;
class middle_out_triangulation;
private:
void onDraw(int loops, SkCanvas*) final {
@ -120,83 +127,91 @@ private:
SkString fName;
};
class GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench
: public GrTessellatePathOp::TestingOnly_Benchmark {
public:
MiddleOutInnerTrianglesBench()
: TestingOnly_Benchmark("prepareMiddleOutInnerTriangles",
ToolUtils::make_star(SkRect::MakeWH(100, 100),
kNumCubicsInChalkboard),
SkMatrix::I()) {
}
void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
int numBeziers;
op->prepareMiddleOutInnerTriangles(target, &numBeziers);
}
};
#define DEF_TESS_BENCH(NAME, PATH, MATRIX, TARGET, OP) \
class GrTessellatePathOp::TestingOnly_Benchmark::NAME \
: public GrTessellatePathOp::TestingOnly_Benchmark { \
public: \
NAME() : TestingOnly_Benchmark(#NAME, (PATH), (MATRIX)) {} \
void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override; \
}; \
DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::NAME(); ); \
void GrTessellatePathOp::TestingOnly_Benchmark::NAME::runBench( \
GrMeshDrawOp::Target* TARGET, GrTessellatePathOp* op)
DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench(); );
DEF_TESS_BENCH(prepareMiddleOutStencilGeometry, make_cubic_path(), SkMatrix::I(), target, op) {
op->prepareMiddleOutTrianglesAndCubics(target);
}
class GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench
: public GrTessellatePathOp::TestingOnly_Benchmark {
public:
OuterCubicsBench()
: TestingOnly_Benchmark("prepareOuterCubics", make_cubic_path(), SkMatrix::I()) {
}
void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
op->prepareOuterCubics(target, kNumCubicsInChalkboard,
CubicDataAlignment::kVertexBoundary);
}
};
DEF_TESS_BENCH(prepareMiddleOutStencilGeometry_indirect, make_cubic_path(), SkMatrix::I(), target,
op) {
GrResolveLevelCounter resolveLevelCounter;
op->prepareMiddleOutTrianglesAndCubics(target, &resolveLevelCounter, true);
}
DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench(); );
DEF_TESS_BENCH(prepareIndirectOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
GrResolveLevelCounter resolveLevelCounter;
resolveLevelCounter.reset(op->fPath, SkMatrix::I(), 4);
op->prepareIndirectOuterCubics(target, resolveLevelCounter);
}
class GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench
: public GrTessellatePathOp::TestingOnly_Benchmark {
public:
CubicWedgesBench()
: TestingOnly_Benchmark("prepareCubicWedges", make_cubic_path(), SkMatrix::I()) {
}
void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
op->prepareCubicWedges(target);
}
};
DEF_TESS_BENCH(prepareTessellatedOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
op->prepareTessellatedOuterCubics(target, kNumCubicsInChalkboard);
}
DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench(););
DEF_TESS_BENCH(prepareTessellatedCubicWedges, make_cubic_path(), SkMatrix::I(), target, op) {
op->prepareTessellatedCubicWedges(target);
}
class GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench
: public GrTessellatePathOp::TestingOnly_Benchmark {
public:
WangsFormulaBench(const char* suffix, const SkMatrix& matrix)
: TestingOnly_Benchmark(SkStringPrintf("wangs_formula_cubic_log2%s", suffix).c_str(),
make_cubic_path(), SkMatrix::I())
, fMatrix(matrix) {
}
void runBench(GrMeshDrawOp::Target*, GrTessellatePathOp* op) override {
int sum = 0;
GrVectorXform xform(fMatrix);
for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) {
if (verb == SkPathVerb::kCubic) {
sum += GrWangsFormula::cubic_log2(4, pts, xform);
}
}
// Don't let the compiler optimize away GrWangsFormula::cubic_log2.
if (sum <= 0) {
SK_ABORT("sum should be > 0.");
static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
int sum = 0;
GrVectorXform xform(matrix);
for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
if (verb == SkPathVerb::kCubic) {
sum += GrWangsFormula::cubic_log2(4, pts, xform);
}
}
private:
SkMatrix fMatrix;
};
// Don't let the compiler optimize away GrWangsFormula::cubic_log2.
if (sum <= 0) {
SK_ABORT("sum should be > 0.");
}
}
DEF_BENCH(
return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench("", SkMatrix::I());
);
DEF_BENCH(
return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench(
"_scale", SkMatrix::Scale(1.1f, 0.9f));
);
DEF_BENCH(
return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench(
"_affine", SkMatrix::MakeAll(.9f,0.9f,0, 1.1f,1.1f,0, 0,0,1));
);
DEF_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(), SkMatrix::I(), target, op) {
benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}
DEF_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(), SkMatrix::Scale(1.1f, 0.9f),
target, op) {
benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}
DEF_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(),
SkMatrix::MakeAll(.9f,0.9f,0, 1.1f,1.1f,0, 0,0,1), target, op) {
benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}
DEF_TESS_BENCH(middle_out_triangulation,
ToolUtils::make_star(SkRect::MakeWH(500, 500), kNumCubicsInChalkboard),
SkMatrix::I(), target, op) {
auto vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
sizeof(SkPoint), kNumCubicsInChalkboard, nullptr, nullptr));
GrMiddleOutPolygonTriangulator middleOut(vertexData, 3, kNumCubicsInChalkboard + 2);
for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) {
switch (verb) {
case SkPathVerb::kMove:
middleOut.closeAndMove(pts[0]);
break;
case SkPathVerb::kLine:
middleOut.pushVertex(pts[1]);
break;
case SkPathVerb::kClose:
middleOut.close();
break;
case SkPathVerb::kQuad:
case SkPathVerb::kConic:
case SkPathVerb::kCubic:
SkUNREACHABLE;
}
middleOut.closeAndMove(pts[0]);
}
}

View File

@ -444,6 +444,7 @@ skia_gpu_sources = [
"$_src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h",
"$_src/gpu/tessellate/GrMidpointContourParser.h",
"$_src/gpu/tessellate/GrPathShader.h",
"$_src/gpu/tessellate/GrResolveLevelCounter.h",
"$_src/gpu/tessellate/GrStencilPathShader.cpp",
"$_src/gpu/tessellate/GrStencilPathShader.h",
"$_src/gpu/tessellate/GrTessellatePathOp.cpp",

View File

@ -32,7 +32,7 @@ GrPathRendererChain::GrPathRendererChain(GrRecordingContext* context, const Opti
fChain.push_back(sk_make_sp<GrDashLinePathRenderer>());
}
if (options.fGpuPathRenderers & GpuPathRenderers::kTessellation) {
if (caps.shaderCaps()->tessellationSupport() && caps.drawInstancedSupport()) {
if (caps.drawInstancedSupport()) {
auto tess = sk_make_sp<GrTessellationPathRenderer>(caps);
context->priv().addOnFlushCallbackObject(tess.get());
fChain.push_back(std::move(tess));

View File

@ -162,9 +162,10 @@ public:
kTessellate_GrFillBoundingBoxShader_ClassID,
kTessellate_GrFillCubicHullShader_ClassID,
kTessellate_GrFillTriangleShader_ClassID,
kTessellate_GrStencilCubicShader_ClassID,
kTessellate_GrMiddleOutCubicShader_ClassID,
kTessellate_GrStencilTriangleShader_ClassID,
kTessellate_GrStencilWedgeShader_ClassID,
kTessellate_GrTessellateCubicShader_ClassID,
kTessellate_GrTessellateWedgeShader_ClassID,
kTestFP_ClassID,
kTestRectOp_ClassID,
kFlatNormalsFP_ClassID,

View File

@ -0,0 +1,74 @@
/*
* Copyright 2020 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef GrResolveLevelCounter_DEFINED
#define GrResolveLevelCounter_DEFINED
#include "src/core/SkPathPriv.h"
#include "src/gpu/tessellate/GrStencilPathShader.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
// This class helps bin cubics by log2 "resolveLevel" when we don't use hardware tessellation. It is
// composed of simple counters that track how many cubics we intend to draw at each resolveLevel,
// and how many resolveLevels there are that have at least one cubic.
class GrResolveLevelCounter {
public:
void reset() {
memset(fInstanceCounts, 0, sizeof(fInstanceCounts));
SkDEBUGCODE(fHasCalledReset = true;)
}
int reset(const SkPath& path, const SkMatrix& viewMatrix, float intolerance) {
this->reset();
GrVectorXform xform(viewMatrix);
for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
switch (verb) {
case SkPathVerb::kQuad:
// Quadratics get converted to cubics before rendering.
this->countCubic(GrWangsFormula::quadratic_log2(intolerance, pts, xform));
break;
case SkPathVerb::kCubic:
this->countCubic(GrWangsFormula::cubic_log2(intolerance, pts, xform));
break;
default:
break;
}
}
return fTotalCubicInstanceCount;
}
void countCubic(int resolveLevel) {
SkASSERT(fHasCalledReset);
SkASSERT(resolveLevel >= 0);
if (resolveLevel == 0) {
// Cubics with 2^0=1 segments are empty (zero area). We ignore them completely.
return;
}
resolveLevel = std::min(resolveLevel, GrMiddleOutCubicShader::kMaxResolveLevel);
if (!fInstanceCounts[resolveLevel]++) {
++fTotalCubicIndirectDrawCount;
}
++fTotalCubicInstanceCount;
}
int operator[](int resolveLevel) const {
SkASSERT(fHasCalledReset);
SkASSERT(resolveLevel > 0); // Empty cubics with 2^0=1 segments do not need to be drawn.
SkASSERT(resolveLevel <= GrMiddleOutCubicShader::kMaxResolveLevel);
return fInstanceCounts[resolveLevel];
}
int totalCubicInstanceCount() const { return fTotalCubicInstanceCount; }
int totalCubicIndirectDrawCount() const { return fTotalCubicIndirectDrawCount; }
private:
SkDEBUGCODE(bool fHasCalledReset = false;)
int fInstanceCounts[GrMiddleOutCubicShader::kMaxResolveLevel + 1];
int fTotalCubicInstanceCount = 0;
int fTotalCubicIndirectDrawCount = 0;
};
#endif

View File

@ -35,6 +35,7 @@ constexpr char kEvalCubicFn[] = R"(
})";
class GrStencilPathShader::Impl : public GrGLSLGeometryProcessor {
protected:
void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
const auto& shader = args.fGP.cast<GrStencilPathShader>();
args.fVaryingHandler->emitAttributes(shader);
@ -45,7 +46,7 @@ class GrStencilPathShader::Impl : public GrGLSLGeometryProcessor {
fViewMatrixUniform = args.fUniformHandler->addUniform(
nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
args.fVertBuilder->codeAppendf(
"float2 vertexpos = (%s * float3(point, 1)).xy;", viewMatrix);
"float2 vertexpos = (%s * float3(inputPoint, 1)).xy;", viewMatrix);
vertexPos.set(kFloat2_GrSLType, "vertexpos");
}
@ -75,8 +76,8 @@ GrGLSLPrimitiveProcessor* GrStencilPathShader::createGLSLInstance(const GrShader
return new Impl;
}
SkString GrStencilCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString GrTessellateCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString code(versionAndExtensionDecls);
code.append(kWangsFormulaCubicFn);
code.append(R"(
@ -115,8 +116,8 @@ SkString GrStencilCubicShader::getTessControlShaderGLSL(const char* versionAndEx
return code;
}
SkString GrStencilCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString GrTessellateCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString code(versionAndExtensionDecls);
code.append(kEvalCubicFn);
code.append(R"(
@ -147,8 +148,8 @@ SkString GrStencilCubicShader::getTessEvaluationShaderGLSL(const char* versionAn
return code;
}
SkString GrStencilWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString GrTessellateWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString code(versionAndExtensionDecls);
code.append(kWangsFormulaCubicFn);
code.append(R"(
@ -183,8 +184,8 @@ SkString GrStencilWedgeShader::getTessControlShaderGLSL(const char* versionAndEx
return code;
}
SkString GrStencilWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString GrTessellateWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
const GrShaderCaps&) const {
SkString code(versionAndExtensionDecls);
code.append(kEvalCubicFn);
code.append(R"(
@ -218,3 +219,109 @@ SkString GrStencilWedgeShader::getTessEvaluationShaderGLSL(const char* versionAn
return code;
}
GR_DECLARE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
sk_sp<const GrGpuBuffer> GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
GrResourceProvider* resourceProvider) {
GR_DEFINE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
if (auto buffer = resourceProvider->findByUniqueKey<GrGpuBuffer>(gMiddleOutIndexBufferKey)) {
return std::move(buffer);
}
// One explicit triangle at index 0, and one middle-out cubic with kMaxResolveLevel line
// segments beginning at index 3.
constexpr static int indexCount = 3 + NumVerticesAtResolveLevel(kMaxResolveLevel);
auto buffer = resourceProvider->createBuffer(
indexCount * sizeof(uint16_t), GrGpuBufferType::kIndex, kStatic_GrAccessPattern);
if (!buffer) {
return nullptr;
}
// We shouldn't bin and/or cache static buffers.
SkASSERT(buffer->size() == indexCount * sizeof(uint16_t));
SkASSERT(!buffer->resourcePriv().getScratchKey().isValid());
auto indexData = static_cast<uint16_t*>(buffer->map());
SkAutoTMalloc<uint16_t> stagingBuffer;
if (!indexData) {
SkASSERT(!buffer->isMapped());
indexData = stagingBuffer.reset(indexCount);
}
// Indices 0,1,2 contain special values that emit points P0, P1, and P2 respectively. (When the
// vertex shader is fed an index value larger than (1 << kMaxResolveLevel), it emits
// P[index % 4].)
int i = 0;
indexData[i++] = (1 << kMaxResolveLevel) + 4; // % 4 == 0
indexData[i++] = (1 << kMaxResolveLevel) + 5; // % 4 == 1
indexData[i++] = (1 << kMaxResolveLevel) + 6; // % 4 == 2
// Starting at index 3, we triangulate a cubic with 2^kMaxResolveLevel line segments. Each
// index value corresponds to parametric value T=(index / 2^kMaxResolveLevel). Since the
// triangles are arranged in "middle-out" order, we will be able to conveniently control the
// resolveLevel by changing only the indexCount.
for (uint16_t advance = 1 << (kMaxResolveLevel - 1); advance; advance >>= 1) {
uint16_t T = 0;
do {
indexData[i++] = T;
indexData[i++] = (T += advance);
indexData[i++] = (T += advance);
} while (T != (1 << kMaxResolveLevel));
}
SkASSERT(i == indexCount);
if (buffer->isMapped()) {
buffer->unmap();
} else {
buffer->updateData(stagingBuffer, indexCount * sizeof(uint16_t));
}
buffer->resourcePriv().setUniqueKey(gMiddleOutIndexBufferKey);
return std::move(buffer);
}
class GrMiddleOutCubicShader::Impl : public GrStencilPathShader::Impl {
void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
const auto& shader = args.fGP.cast<GrMiddleOutCubicShader>();
args.fVaryingHandler->emitAttributes(shader);
args.fVertBuilder->defineConstant("kMaxResolveLevel", kMaxResolveLevel);
args.fVertBuilder->codeAppend(R"(
float4x2 P = float4x2(inputPoints_0_1, inputPoints_2_3);
float2 point;
if (sk_VertexID > (1 << kMaxResolveLevel)) {
// This is a special index value that wants us to emit a specific point.
point = P[sk_VertexID & 3];
} else {)");
// Evaluate the cubic at T=(sk_VertexID / 2^kMaxResolveLevel).
if (args.fShaderCaps->fpManipulationSupport()) {
args.fVertBuilder->codeAppend(R"(
float T = ldexp(sk_VertexID, -kMaxResolveLevel);)");
} else {
args.fVertBuilder->codeAppend(R"(
float T = sk_VertexID / float(1 << kMaxResolveLevel);)");
}
args.fVertBuilder->codeAppend(R"(
float2 ab = mix(P[0], P[1], T);
float2 bc = mix(P[1], P[2], T);
float2 cd = mix(P[2], P[3], T);
float2 abc = mix(ab, bc, T);
float2 bcd = mix(bc, cd, T);
point = mix(abc, bcd, T);
})");
GrShaderVar vertexPos("point", kFloat2_GrSLType);
if (!shader.viewMatrix().isIdentity()) {
const char* viewMatrix;
fViewMatrixUniform = args.fUniformHandler->addUniform(
nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
args.fVertBuilder->codeAppendf(R"(
float2 transformedPoint = (%s * float3(point, 1)).xy;)", viewMatrix);
vertexPos.set(kFloat2_GrSLType, "transformedPoint");
}
gpArgs->fPositionVar = vertexPos;
// No fragment shader.
}
};
GrGLSLPrimitiveProcessor* GrMiddleOutCubicShader::createGLSLInstance(const GrShaderCaps&) const {
return new Impl;
}

View File

@ -17,16 +17,15 @@ public:
GrStencilPathShader(ClassID classID, const SkMatrix& viewMatrix, GrPrimitiveType primitiveType,
int tessellationPatchVertexCount = 0)
: GrPathShader(classID, viewMatrix, primitiveType, tessellationPatchVertexCount) {
constexpr static Attribute kPointAttrib = {
"point", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
this->setVertexAttributes(&kPointAttrib, 1);
}
private:
void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const final {
protected:
constexpr static Attribute kSinglePointAttrib{"inputPoint", kFloat2_GrVertexAttribType,
kFloat2_GrSLType};
void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override {
b->add32(this->viewMatrix().isIdentity());
}
GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const final;
GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
class Impl;
};
@ -35,18 +34,21 @@ private:
class GrStencilTriangleShader : public GrStencilPathShader {
public:
GrStencilTriangleShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) {}
kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) {
this->setVertexAttributes(&kSinglePointAttrib, 1);
}
const char* name() const override { return "tessellate_GrStencilTriangleShader"; }
};
// Uses GPU tessellation shaders to linearize, triangulate, and render standalone cubics. Here, a
// "cubic" is a standalone closed contour consisting of a single cubic bezier.
// Uses GPU tessellation shaders to linearize, triangulate, and render standalone closed cubics.
// TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics.
class GrStencilCubicShader : public GrStencilPathShader {
class GrTessellateCubicShader : public GrStencilPathShader {
public:
GrStencilCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
kTessellate_GrStencilCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) {}
const char* name() const override { return "tessellate_GrStencilCubicShader"; }
GrTessellateCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
kTessellate_GrTessellateCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) {
this->setVertexAttributes(&kSinglePointAttrib, 1);
}
const char* name() const override { return "tessellate_GrTessellateCubicShader"; }
private:
SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls,
@ -59,11 +61,13 @@ private:
// wedge is a 5-point patch consisting of 4 cubic control points, plus an anchor point fanning from
// the center of the curve's resident contour.
// TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics.
class GrStencilWedgeShader : public GrStencilPathShader {
class GrTessellateWedgeShader : public GrStencilPathShader {
public:
GrStencilWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
kTessellate_GrStencilWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) {}
const char* name() const override { return "tessellate_GrStencilWedgeShader"; }
GrTessellateWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
kTessellate_GrTessellateWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) {
this->setVertexAttributes(&kSinglePointAttrib, 1);
}
const char* name() const override { return "tessellate_GrTessellateWedgeShader"; }
private:
SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls,
@ -72,4 +76,69 @@ private:
const GrShaderCaps&) const override;
};
// Uses indirect (instanced) draws to triangulate standalone closed cubics with a "middle-out"
// topology. The caller must compute each cubic's resolveLevel on the CPU (i.e., the log2 number of
// line segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2), and then
// sort the instance buffer by resolveLevel for efficient batching of indirect draws.
class GrMiddleOutCubicShader : public GrStencilPathShader {
public:
// Each resolveLevel linearizes the curve into 2^resolveLevel line segments. The finest
// supported resolveLevel is therefore 2^12=4096 line segments.
constexpr static int kMaxResolveLevel = 12;
// How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel
// line segments?
constexpr static int NumVerticesAtResolveLevel(int resolveLevel) {
// resolveLevel=0 -> 0 line segments -> 0 triangles -> 0 vertices
// resolveLevel=1 -> 2 line segments -> 1 triangle -> 3 vertices
// resolveLevel=2 -> 4 line segments -> 3 triangles -> 9 vertices
// resolveLevel=3 -> 8 line segments -> 7 triangles -> 21 vertices
// ...
return ((1 << resolveLevel) - 1) * 3;
}
// Configures an indirect draw to render cubic instances with 2^resolveLevel evenly-spaced (in
// the parametric sense) line segments.
static GrDrawIndexedIndirectCommand MakeDrawCubicsIndirectCmd(int resolveLevel,
uint32_t instanceCount,
uint32_t baseInstance) {
SkASSERT(resolveLevel > 0 && resolveLevel <= kMaxResolveLevel);
// Starting at baseIndex=3, the index buffer triangulates a cubic with 2^kMaxResolveLevel
// line segments. Each index value corresponds to a parametric T value on the curve. Since
// the triangles are arranged in "middle-out" order, we can conveniently control the
// resolveLevel by changing only the indexCount.
uint32_t indexCount = NumVerticesAtResolveLevel(resolveLevel);
return {indexCount, instanceCount, 3, 0, baseInstance};
}
// For performance reasons we can often express triangles as an indirect cubic draw and sneak
// them in alongside the other indirect draws. This method configures an indirect draw to emit
// the triangle [P0, P1, P2] from a 4-point instance.
static GrDrawIndexedIndirectCommand MakeDrawTrianglesIndirectCmd(uint32_t instanceCount,
uint32_t baseInstance) {
// Indices 0,1,2 have special index values that emit points P0, P1, and P2 respectively.
return {3, instanceCount, 0, 0, baseInstance};
}
// Returns the index buffer that should be bound when drawing with this shader.
// (Our vertex shader uses raw index values directly, so there is no vertex buffer.)
static sk_sp<const GrGpuBuffer> FindOrMakeMiddleOutIndexBuffer(GrResourceProvider*);
GrMiddleOutCubicShader(const SkMatrix& viewMatrix)
: GrStencilPathShader(kTessellate_GrMiddleOutCubicShader_ClassID, viewMatrix,
GrPrimitiveType::kTriangles) {
constexpr static Attribute kInputPtsAttribs[] = {
{"inputPoints_0_1", kFloat4_GrVertexAttribType, kFloat4_GrSLType},
{"inputPoints_2_3", kFloat4_GrVertexAttribType, kFloat4_GrSLType}};
this->setInstanceAttributes(kInputPtsAttribs, 2);
}
const char* name() const override { return "tessellate_GrMiddleOutCubicShader"; }
private:
GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
class Impl;
};
#endif

View File

@ -14,8 +14,12 @@
#include "src/gpu/tessellate/GrFillPathShader.h"
#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
#include "src/gpu/tessellate/GrMidpointContourParser.h"
#include "src/gpu/tessellate/GrResolveLevelCounter.h"
#include "src/gpu/tessellate/GrStencilPathShader.h"
constexpr static int kMaxResolveLevel = GrMiddleOutCubicShader::kMaxResolveLevel;
constexpr static float kTessellationIntolerance = 4; // 1/4 of a pixel.
GrTessellatePathOp::FixedFunctionFlags GrTessellatePathOp::fixedFunctionFlags() const {
auto flags = FixedFunctionFlags::kUsesStencil;
if (GrAAType::kNone != fAAType) {
@ -30,50 +34,69 @@ void GrTessellatePathOp::onPrePrepare(GrRecordingContext*,
const GrXferProcessor::DstProxyView&) {
}
void GrTessellatePathOp::onPrepare(GrOpFlushState* state) {
// First check if the path is large and/or simple enough that we can actually triangulate the
// inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the
// curves, and then fill the internal polygons directly to the final render target, thus filling
// in the majority of pixels in a single render pass.
SkScalar scales[2];
SkAssertResult(fViewMatrix.getMinMaxScales(scales)); // Will fail if perspective.
const SkRect& bounds = fPath.getBounds();
void GrTessellatePathOp::onPrepare(GrOpFlushState* flushState) {
int numVerbs = fPath.countVerbs();
if (numVerbs <= 0) {
return;
}
// First check if the path is large and/or simple enough that we can actually triangulate the
// inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the
// curves, and then fill the internal polygons directly to the final render target, thus drawing
// the majority of pixels in a single render pass.
SkScalar scales[2];
SkAssertResult(fViewMatrix.getMinMaxScales(scales)); // Will fail if perspective.
const SkRect& bounds = fPath.getBounds();
float gpuFragmentWork = bounds.height() * scales[0] * bounds.width() * scales[1];
float cpuTessellationWork = (float)numVerbs * SkNextLog2(numVerbs); // N log N.
if (cpuTessellationWork * 500 + (256 * 256) < gpuFragmentWork) { // Don't try below 256x256.
int numCountedCurves;
int numCountedCubics;
// This will fail if the inner triangles do not form a simple polygon (e.g., self
// intersection, double winding).
if (this->prepareNonOverlappingInnerTriangles(state, &numCountedCurves)) {
// Prepare cubics on an instance boundary so we can use the buffer to fill local convex
// hulls as well.
this->prepareOuterCubics(state, numCountedCurves,
CubicDataAlignment::kInstanceBoundary);
if (this->prepareNonOverlappingInnerTriangles(flushState, &numCountedCubics)) {
if (!numCountedCubics) {
return;
}
// Always use indirect draws for cubics instead of tessellation here. Our goal in this
// mode is to maximize GPU performance, and the middle-out topology used by our indirect
// draws is easier on the rasterizer than a tessellated fan. There also seems to be a
// small amount of fixed tessellation overhead that this avoids.
//
// NOTE: This will count fewer cubics than above if it discards any for resolveLevel=0.
GrResolveLevelCounter resolveLevelCounter;
numCountedCubics = resolveLevelCounter.reset(fPath, fViewMatrix,
kTessellationIntolerance);
this->prepareIndirectOuterCubics(flushState, resolveLevelCounter);
return;
}
}
// Next see if we can split up inner polygon triangles and curves, and triangulate the inner
// polygon(s) more efficiently. This causes greater CPU overhead due to the extra shaders and
// draw calls, but the better triangulation can reduce the rasterizer load by a great deal on
// complex paths.
// When there are only a few verbs, it seems to always be fastest to make a single indirect draw
// that contains both the inner triangles and the outer cubics, instead of using hardware
// tessellation. Also take this path if tessellation is not supported.
bool drawTrianglesAsIndirectCubicDraw = (numVerbs < 50);
if (drawTrianglesAsIndirectCubicDraw ||
!flushState->caps().shaderCaps()->tessellationSupport()) {
// Prepare outer cubics with indirect draws.
GrResolveLevelCounter resolveLevelCounter;
this->prepareMiddleOutTrianglesAndCubics(flushState, &resolveLevelCounter,
drawTrianglesAsIndirectCubicDraw);
return;
}
// Next see if we can split up the inner triangles and outer cubics into two draw calls. This
// allows for a more efficient inner triangle topology that can reduce the rasterizer load by a
// large margin on complex paths, but also causes greater CPU overhead due to the extra shader
// switches and draw calls.
// NOTE: Raster-edge work is 1-dimensional, so we sum height and width instead of multiplying.
float rasterEdgeWork = (bounds.height() + bounds.width()) * scales[1] * fPath.countVerbs();
if (rasterEdgeWork > 1000 * 1000) {
int numCountedCurves;
this->prepareMiddleOutInnerTriangles(state, &numCountedCurves);
// We will fill the path with a bounding box instead local cubic convex hulls, so there is
// no need to prepare the cubics on an instance boundary.
this->prepareOuterCubics(state, numCountedCurves, CubicDataAlignment::kVertexBoundary);
if (rasterEdgeWork > 300 * 300) {
this->prepareMiddleOutTrianglesAndCubics(flushState);
return;
}
// Fastest CPU approach: emit one cubic wedge per verb, fanning out from the center.
this->prepareCubicWedges(state);
this->prepareTessellatedCubicWedges(flushState);
}
bool GrTessellatePathOp::prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target* target,
@ -106,40 +129,74 @@ bool GrTessellatePathOp::prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Targe
return true;
}
void GrTessellatePathOp::prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target* target,
int* numCountedCurves) {
void GrTessellatePathOp::prepareMiddleOutTrianglesAndCubics(
GrMeshDrawOp::Target* target, GrResolveLevelCounter* resolveLevelCounter,
bool drawTrianglesAsIndirectCubicDraw) {
SkASSERT(!fTriangleBuffer);
SkASSERT(!fDoStencilTriangleBuffer);
SkASSERT(!fDoFillTriangleBuffer);
SkASSERT(!fCubicBuffer);
SkASSERT(!fStencilCubicsShader);
SkASSERT(!fIndirectDrawBuffer);
// No initial moveTo, plus an implicit close at the end; n-2 triangles fill an n-gon.
// Each triangle has 3 vertices.
int maxVertices = (fPath.countVerbs() - 1) * 3;
int maxInnerTriangles = fPath.countVerbs() - 1;
int maxCubics = fPath.countVerbs();
GrEagerDynamicVertexAllocator vertexAlloc(target, &fTriangleBuffer, &fBaseTriangleVertex);
auto* vertexData = vertexAlloc.lock<SkPoint>(maxVertices);
SkPoint* vertexData;
int vertexAdvancePerTriangle;
if (drawTrianglesAsIndirectCubicDraw) {
// Allocate the triangles as 4-point instances at the beginning of the cubic buffer.
SkASSERT(resolveLevelCounter);
vertexAdvancePerTriangle = 4;
int baseTriangleInstance;
vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
sizeof(SkPoint) * 4, maxInnerTriangles + maxCubics, &fCubicBuffer,
&baseTriangleInstance));
fBaseCubicVertex = baseTriangleInstance * 4;
} else {
// Allocate the triangles as normal 3-point instances in the triangle buffer.
vertexAdvancePerTriangle = 3;
vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
sizeof(SkPoint), maxInnerTriangles * 3, &fTriangleBuffer, &fBaseTriangleVertex));
}
if (!vertexData) {
return;
}
constexpr static int kNumVerticesPerTriangle = 3;
GrMiddleOutPolygonTriangulator middleOut(vertexData, kNumVerticesPerTriangle, maxVertices);
int localCurveCount = 0;
GrVectorXform xform(fViewMatrix);
GrMiddleOutPolygonTriangulator middleOut(vertexData, vertexAdvancePerTriangle,
fPath.countVerbs());
if (resolveLevelCounter) {
resolveLevelCounter->reset();
}
int numCountedCurves = 0;
for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
switch (verb) {
case SkPathVerb::kMove:
middleOut.closeAndMove(*pts++);
middleOut.closeAndMove(pts[0]);
break;
case SkPathVerb::kLine:
middleOut.pushVertex(pts[1]);
break;
case SkPathVerb::kQuad:
middleOut.pushVertex(pts[2]);
++localCurveCount;
if (resolveLevelCounter) {
// Quadratics get converted to cubics before rendering.
resolveLevelCounter->countCubic(GrWangsFormula::quadratic_log2(
kTessellationIntolerance, pts, xform));
break;
}
++numCountedCurves;
break;
case SkPathVerb::kCubic:
middleOut.pushVertex(pts[3]);
++localCurveCount;
if (resolveLevelCounter) {
resolveLevelCounter->countCubic(GrWangsFormula::cubic_log2(
kTessellationIntolerance, pts, xform));
break;
}
++numCountedCurves;
break;
case SkPathVerb::kClose:
middleOut.close();
@ -148,13 +205,31 @@ void GrTessellatePathOp::prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target* ta
SkUNREACHABLE;
}
}
fTriangleVertexCount = middleOut.close() * kNumVerticesPerTriangle;
*numCountedCurves = localCurveCount;
int triangleCount = middleOut.close();
SkASSERT(triangleCount <= maxInnerTriangles);
vertexAlloc.unlock(fTriangleVertexCount);
if (fTriangleVertexCount) {
fDoStencilTriangleBuffer = true;
if (drawTrianglesAsIndirectCubicDraw) {
SkASSERT(resolveLevelCounter);
int totalInstanceCount = triangleCount + resolveLevelCounter->totalCubicInstanceCount();
SkASSERT(vertexAdvancePerTriangle == 4);
target->putBackVertices(maxInnerTriangles + maxCubics - totalInstanceCount,
sizeof(SkPoint) * 4);
if (totalInstanceCount) {
this->prepareIndirectOuterCubicsAndTriangles(target, *resolveLevelCounter, vertexData,
triangleCount);
}
} else {
SkASSERT(vertexAdvancePerTriangle == 3);
target->putBackVertices(maxInnerTriangles - triangleCount, sizeof(SkPoint) * 3);
fTriangleVertexCount = triangleCount * 3;
if (fTriangleVertexCount) {
fDoStencilTriangleBuffer = true;
}
if (resolveLevelCounter) {
this->prepareIndirectOuterCubics(target, *resolveLevelCounter);
} else {
this->prepareTessellatedOuterCubics(target, numCountedCurves);
}
}
}
@ -177,8 +252,133 @@ static void quad2cubic(const SkPoint pts[], SkPoint* out) {
out[3] = pts[2];
}
void GrTessellatePathOp::prepareOuterCubics(GrMeshDrawOp::Target* target, int numCountedCurves,
CubicDataAlignment alignment) {
void GrTessellatePathOp::prepareIndirectOuterCubics(
GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter) {
SkASSERT(resolveLevelCounter.totalCubicInstanceCount() >= 0);
if (resolveLevelCounter.totalCubicInstanceCount() == 0) {
return;
}
// Allocate a buffer to store the cubic data.
SkPoint* cubicData;
int baseInstance;
cubicData = static_cast<SkPoint*>(target->makeVertexSpace(
sizeof(SkPoint) * 4, resolveLevelCounter.totalCubicInstanceCount(), &fCubicBuffer,
&baseInstance));
if (!cubicData) {
return;
}
fBaseCubicVertex = baseInstance * 4;
this->prepareIndirectOuterCubicsAndTriangles(target, resolveLevelCounter, cubicData,
/*numTrianglesAtBeginningOfData=*/0);
}
void GrTessellatePathOp::prepareIndirectOuterCubicsAndTriangles(
GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter,
SkPoint* cubicData, int numTrianglesAtBeginningOfData) {
SkASSERT(numTrianglesAtBeginningOfData + resolveLevelCounter.totalCubicInstanceCount() > 0);
SkASSERT(!fStencilCubicsShader);
SkASSERT(cubicData);
// Here we treat fCubicBuffer as an instance buffer. It should have been prepared with the base
// vertex on an instance boundary in order to accommodate this.
SkASSERT(fBaseCubicVertex % 4 == 0);
int baseInstance = fBaseCubicVertex >> 2;
// Start preparing the indirect draw buffer.
fIndirectDrawCount = resolveLevelCounter.totalCubicIndirectDrawCount();
if (numTrianglesAtBeginningOfData) {
++fIndirectDrawCount; // Add an indirect draw for the triangles at the beginning.
}
// Allocate space for the GrDrawIndexedIndirectCommand structs.
GrDrawIndexedIndirectCommand* indirectData = target->makeDrawIndexedIndirectSpace(
fIndirectDrawCount, &fIndirectDrawBuffer, &fIndirectDrawOffset);
if (!indirectData) {
SkASSERT(!fIndirectDrawBuffer);
return;
}
// Fill out the GrDrawIndexedIndirectCommand structs and determine the starting instance data
// location at each resolve level.
SkPoint* instanceLocations[kMaxResolveLevel + 1];
int indirectIdx = 0;
int runningInstanceCount = 0;
if (numTrianglesAtBeginningOfData) {
// The caller has already packed "triangleInstanceCount" triangles into 4-point instances
// at the beginning of the instance buffer. Add a special-case indirect draw here that will
// emit the triangles [P0, P1, P2] from these 4-point instances.
indirectData[0] = GrMiddleOutCubicShader::MakeDrawTrianglesIndirectCmd(
numTrianglesAtBeginningOfData, baseInstance);
indirectIdx = 1;
runningInstanceCount = numTrianglesAtBeginningOfData;
}
for (int resolveLevel = 1; resolveLevel <= kMaxResolveLevel; ++resolveLevel) {
instanceLocations[resolveLevel] = cubicData + runningInstanceCount * 4;
if (int instanceCountAtCurrLevel = resolveLevelCounter[resolveLevel]) {
indirectData[indirectIdx++] = GrMiddleOutCubicShader::MakeDrawCubicsIndirectCmd(
resolveLevel, instanceCountAtCurrLevel, baseInstance + runningInstanceCount);
runningInstanceCount += instanceCountAtCurrLevel;
}
}
#ifdef SK_DEBUG
SkASSERT(indirectIdx == fIndirectDrawCount);
SkASSERT(runningInstanceCount == numTrianglesAtBeginningOfData +
resolveLevelCounter.totalCubicInstanceCount());
SkASSERT(fIndirectDrawCount > 0);
SkPoint* endLocations[kMaxResolveLevel + 1];
memcpy(endLocations, instanceLocations + 1, kMaxResolveLevel * sizeof(SkPoint*));
int totalInstanceCount = numTrianglesAtBeginningOfData +
resolveLevelCounter.totalCubicInstanceCount();
endLocations[kMaxResolveLevel] = cubicData + totalInstanceCount * 4;
#endif
fCubicVertexCount = numTrianglesAtBeginningOfData * 4;
if (resolveLevelCounter.totalCubicInstanceCount()) {
GrVectorXform xform(fViewMatrix);
for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
int level;
switch (verb) {
default:
continue;
case SkPathVerb::kQuad:
level = GrWangsFormula::quadratic_log2(kTessellationIntolerance, pts, xform);
if (level == 0) {
continue;
}
level = std::min(level, kMaxResolveLevel);
quad2cubic(pts, instanceLocations[level]);
break;
case SkPathVerb::kCubic:
level = GrWangsFormula::cubic_log2(kTessellationIntolerance, pts, xform);
if (level == 0) {
continue;
}
level = std::min(level, kMaxResolveLevel);
memcpy(instanceLocations[level], pts, sizeof(SkPoint) * 4);
break;
}
instanceLocations[level] += 4;
fCubicVertexCount += 4;
}
}
#ifdef SK_DEBUG
for (int i = 1; i <= kMaxResolveLevel; ++i) {
SkASSERT(instanceLocations[i] == endLocations[i]);
}
SkASSERT(fCubicVertexCount == (numTrianglesAtBeginningOfData +
resolveLevelCounter.totalCubicInstanceCount()) * 4);
#endif
fStencilCubicsShader = target->allocator()->make<GrMiddleOutCubicShader>(fViewMatrix);
}
void GrTessellatePathOp::prepareTessellatedOuterCubics(GrMeshDrawOp::Target* target,
int numCountedCurves) {
SkASSERT(numCountedCurves >= 0);
SkASSERT(!fCubicBuffer);
SkASSERT(!fStencilCubicsShader);
@ -186,43 +386,37 @@ void GrTessellatePathOp::prepareOuterCubics(GrMeshDrawOp::Target* target, int nu
return;
}
bool instanceAligned = (alignment == CubicDataAlignment::kInstanceBoundary);
int instanceOrVertexStride = (instanceAligned) ? sizeof(SkPoint) * 4 : sizeof(SkPoint);
int instanceOrVertexCount = (instanceAligned) ? numCountedCurves : numCountedCurves * 4;
int baseInstanceOrVertex;
auto* vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
instanceOrVertexStride, instanceOrVertexCount, &fCubicBuffer, &baseInstanceOrVertex));
sizeof(SkPoint), numCountedCurves * 4, &fCubicBuffer, &fBaseCubicVertex));
if (!vertexData) {
return;
}
fBaseCubicVertex = (instanceAligned) ? baseInstanceOrVertex * 4 : baseInstanceOrVertex;
fCubicVertexCount = 0;
for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
switch (verb) {
default:
continue;
case SkPathVerb::kQuad:
SkASSERT(fCubicVertexCount < numCountedCurves * 4);
quad2cubic(pts, vertexData + fCubicVertexCount);
fCubicVertexCount += 4;
break;
case SkPathVerb::kCubic:
SkASSERT(fCubicVertexCount < numCountedCurves * 4);
memcpy(vertexData + fCubicVertexCount, pts, sizeof(SkPoint) * 4);
fCubicVertexCount += 4;
break;
default:
break;
}
fCubicVertexCount += 4;
}
SkASSERT(fCubicVertexCount == numCountedCurves * 4);
fStencilCubicsShader = target->allocator()->make<GrStencilCubicShader>(fViewMatrix);
fStencilCubicsShader = target->allocator()->make<GrTessellateCubicShader>(fViewMatrix);
}
void GrTessellatePathOp::prepareCubicWedges(GrMeshDrawOp::Target* target) {
void GrTessellatePathOp::prepareTessellatedCubicWedges(GrMeshDrawOp::Target* target) {
SkASSERT(!fCubicBuffer);
SkASSERT(!fStencilCubicsShader);
SkASSERT(target->caps().shaderCaps()->tessellationSupport());
// No initial moveTo, one wedge per verb, plus an implicit close at the end.
// Each wedge has 5 vertices.
@ -275,18 +469,18 @@ void GrTessellatePathOp::prepareCubicWedges(GrMeshDrawOp::Target* target) {
vertexAlloc.unlock(fCubicVertexCount);
if (fCubicVertexCount) {
fStencilCubicsShader = target->allocator()->make<GrStencilWedgeShader>(fViewMatrix);
fStencilCubicsShader = target->allocator()->make<GrTessellateWedgeShader>(fViewMatrix);
}
}
void GrTessellatePathOp::onExecute(GrOpFlushState* state, const SkRect& chainBounds) {
this->drawStencilPass(state);
void GrTessellatePathOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
this->drawStencilPass(flushState);
if (!(Flags::kStencilOnly & fFlags)) {
this->drawCoverPass(state);
this->drawCoverPass(flushState);
}
}
void GrTessellatePathOp::drawStencilPass(GrOpFlushState* state) {
void GrTessellatePathOp::drawStencilPass(GrOpFlushState* flushState) {
// Increments clockwise triangles and decrements counterclockwise. Used for "winding" fill.
constexpr static GrUserStencilSettings kIncrDecrStencil(
GrUserStencilSettings::StaticInitSeparate<
@ -311,41 +505,49 @@ void GrTessellatePathOp::drawStencilPass(GrOpFlushState* state) {
if (GrAAType::kNone != fAAType) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias;
}
if (state->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) {
if (flushState->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kWireframe;
}
SkASSERT(SkPathFillType::kWinding == fPath.getFillType() ||
SkPathFillType::kEvenOdd == fPath.getFillType());
initArgs.fUserStencil = (SkPathFillType::kWinding == fPath.getFillType()) ?
&kIncrDecrStencil : &kInvertStencil;
initArgs.fCaps = &state->caps();
initArgs.fCaps = &flushState->caps();
GrPipeline pipeline(initArgs, GrDisableColorXPFactory::MakeXferProcessor(),
state->appliedHardClip());
flushState->appliedHardClip());
if (fDoStencilTriangleBuffer) {
SkASSERT(fTriangleBuffer);
GrStencilTriangleShader stencilTriangleShader(fViewMatrix);
GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline,
GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
&stencilTriangleShader);
state->bindPipelineAndScissorClip(programInfo, this->bounds());
state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
state->draw(fTriangleVertexCount, fBaseTriangleVertex);
flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
flushState->draw(fTriangleVertexCount, fBaseTriangleVertex);
}
if (fStencilCubicsShader) {
GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, fStencilCubicsShader);
state->bindPipelineAndScissorClip(programInfo, this->bounds());
state->bindBuffers(nullptr, nullptr, fCubicBuffer.get());
state->draw(fCubicVertexCount, fBaseCubicVertex);
}
// http://skbug.com/9739
if (state->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) {
state->gpu()->insertManualFramebufferBarrier();
SkASSERT(fCubicBuffer);
GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
fStencilCubicsShader);
flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
if (fIndirectDrawBuffer) {
auto indexBuffer = GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
flushState->resourceProvider());
flushState->bindBuffers(indexBuffer.get(), fCubicBuffer.get(), nullptr);
flushState->drawIndexedIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,
fIndirectDrawCount);
} else {
flushState->bindBuffers(nullptr, nullptr, fCubicBuffer.get());
flushState->draw(fCubicVertexCount, fBaseCubicVertex);
if (flushState->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) {
flushState->gpu()->insertManualFramebufferBarrier(); // http://skbug.com/9739
}
}
}
}
void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
void GrTessellatePathOp::drawCoverPass(GrOpFlushState* flushState) {
// Allows non-zero stencil values to pass and write a color, and resets the stencil value back
// to zero; discards immediately on stencil values of zero.
// NOTE: It's ok to not check the clip here because the previous stencil pass only wrote to
@ -362,7 +564,7 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
GrPipeline::InitArgs initArgs;
if (GrAAType::kNone != fAAType) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias;
if (1 == state->proxy()->numSamples()) {
if (1 == flushState->proxy()->numSamples()) {
SkASSERT(GrAAType::kCoverage == fAAType);
// We are mixed sampled. Use conservative raster to make the sample coverage mask 100%
// at every fragment. This way we will still get a double hit on shared edges, but
@ -371,10 +573,10 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kConservativeRaster;
}
}
initArgs.fCaps = &state->caps();
initArgs.fDstProxyView = state->drawOpArgs().dstProxyView();
initArgs.fWriteSwizzle = state->drawOpArgs().writeSwizzle();
GrPipeline pipeline(initArgs, std::move(fProcessors), state->detachAppliedClip());
initArgs.fCaps = &flushState->caps();
initArgs.fDstProxyView = flushState->drawOpArgs().dstProxyView();
initArgs.fWriteSwizzle = flushState->drawOpArgs().writeSwizzle();
GrPipeline pipeline(initArgs, std::move(fProcessors), flushState->detachAppliedClip());
if (fDoFillTriangleBuffer) {
SkASSERT(fTriangleBuffer);
@ -421,29 +623,32 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
}
GrFillTriangleShader fillTriangleShader(fViewMatrix, fColor);
GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillTriangleShader);
state->bindPipelineAndScissorClip(programInfo, this->bounds());
state->bindTextures(fillTriangleShader, nullptr, pipeline);
state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
state->draw(fTriangleVertexCount, fBaseTriangleVertex);
GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
&fillTriangleShader);
flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
flushState->bindTextures(fillTriangleShader, nullptr, pipeline);
flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
flushState->draw(fTriangleVertexCount, fBaseTriangleVertex);
if (fStencilCubicsShader) {
SkASSERT(fCubicBuffer);
// At this point, every pixel is filled in except the ones touched by curves. Issue a
// final cover pass over the curves by drawing their convex hulls. This will fill in any
// remaining samples and reset the stencil buffer.
pipeline.setUserStencil(&kTestAndResetStencil);
GrFillCubicHullShader fillCubicHullShader(fViewMatrix, fColor);
GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline,
GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
&fillCubicHullShader);
state->bindPipelineAndScissorClip(programInfo, this->bounds());
state->bindTextures(fillCubicHullShader, nullptr, pipeline);
flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
flushState->bindTextures(fillCubicHullShader, nullptr, pipeline);
// Here we treat fCubicBuffer as an instance buffer. It should have been prepared with
// the base vertex on an instance boundary in order to accommodate this.
SkASSERT((fCubicVertexCount % 4) == 0);
SkASSERT((fBaseCubicVertex % 4) == 0);
state->bindBuffers(nullptr, fCubicBuffer.get(), nullptr);
state->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0);
flushState->bindBuffers(nullptr, fCubicBuffer.get(), nullptr);
flushState->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0);
}
return;
}
@ -451,9 +656,10 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
// There are no triangles to fill. Just draw a bounding box.
pipeline.setUserStencil(&kTestAndResetStencil);
GrFillBoundingBoxShader fillBoundingBoxShader(fViewMatrix, fColor, fPath.getBounds());
GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillBoundingBoxShader);
state->bindPipelineAndScissorClip(programInfo, this->bounds());
state->bindTextures(fillBoundingBoxShader, nullptr, pipeline);
state->bindBuffers(nullptr, nullptr, nullptr);
state->draw(4, 0);
GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
&fillBoundingBoxShader);
flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
flushState->bindTextures(fillBoundingBoxShader, nullptr, pipeline);
flushState->bindBuffers(nullptr, nullptr, nullptr);
flushState->draw(4, 0);
}

View File

@ -12,10 +12,11 @@
class GrAppliedHardClip;
class GrStencilPathShader;
class GrResolveLevelCounter;
// Renders paths using a hybrid Red Book "stencil, then cover" method. Curves get linearized by
// GPU tessellation shaders. This Op doesn't apply analytic AA, so it requires a render target that
// supports either MSAA or mixed samples if AA is desired.
// Renders paths using a hybrid "Red Book" (stencil, then cover) method. Curves get linearized by
// either GPU tessellation shaders or indirect draws. This Op doesn't apply analytic AA, so it
// requires a render target that supports either MSAA or mixed samples if AA is desired.
class GrTessellatePathOp : public GrDrawOp {
public:
enum class Flags {
@ -66,25 +67,41 @@ private:
// and this is not an option as it would introduce T-junctions with the outer cubics.
bool prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves);
// Produces a "Red Book" style triangulation of the SkPath's inner polygon(s). The inner
// polygons connect the endpoints of each verb. (i.e., they are the path that would result from
// collapsing all curves to single lines.) Stencilled together with the outer cubics, these
// define the complete path.
// Produces a "Red Book" style triangulation of the SkPath's inner polygon(s) using a
// "middle-out" topology (See GrMiddleOutPolygonTriangulator), and then prepares outer cubics in
// the cubic buffer. The inner triangles and outer cubics stencilled together define the
// complete path.
//
// This method emits the inner triangles with a "middle-out" topology. Middle-out can reduce
// the load on the rasterizer by a great deal as compared to a linear triangle strip or fan.
// See GrMiddleOutPolygonTriangulator.
void prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves);
// If a resolveLevel counter is provided, this method resets it and uses it to count and
// prepares the outer cubics as indirect draws. Otherwise they are prepared as hardware
// tessellation patches.
//
// If drawTrianglesAsIndirectCubicDraw is true, then the resolveLevel counter must be non-null,
// and we express the inner triangles as an indirect cubic draw and sneak them in alongside the
// other cubic draws.
void prepareMiddleOutTrianglesAndCubics(GrMeshDrawOp::Target*, GrResolveLevelCounter* = nullptr,
bool drawTrianglesAsIndirectCubicDraw = false);
enum class CubicDataAlignment : bool {
kVertexBoundary,
kInstanceBoundary
};
// Prepares a list of indirect draw commands and instance data for the path's "outer cubics",
// converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour
// consisting of a single cubic curve. Stencilled together with the inner triangles, these
// define the complete path.
void prepareIndirectOuterCubics(GrMeshDrawOp::Target*, const GrResolveLevelCounter&);
// Writes an array of "outer" cubics from each bezier in the SkPath, converting any quadratics
// to cubics. An outer cubic is an independent, 4-point closed contour consisting of a single
// cubic curve. Stencilled together with the inner triangles, these define the complete path.
void prepareOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves, CubicDataAlignment);
// For performance reasons we can often express triangles as an indirect cubic draw and sneak
// them in alongside the other indirect draws. This prepareIndirectOuterCubics variant allows
// the caller to provide a mapped cubic buffer with triangles already written into 4-point
// instances at the beginning. If numTrianglesAtBeginningOfData is nonzero, we add an extra
// indirect draw that renders these triangles.
void prepareIndirectOuterCubicsAndTriangles(GrMeshDrawOp::Target*, const GrResolveLevelCounter&,
SkPoint* cubicData,
int numTrianglesAtBeginningOfData);
// Writes an array of "outer cubic" tessellation patches from each bezier in the SkPath,
// converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour
// consisting of a single cubic curve. Stencilled together with the inner triangles, these
// define the complete path.
void prepareTessellatedOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves);
// Writes an array of cubic "wedges" from the SkPath, converting any lines or quadratics to
// cubics. A wedge is an independent, 5-point closed contour consisting of 4 cubic control
@ -92,7 +109,7 @@ private:
// stencilled, these wedges alone define the complete path.
//
// TODO: Eventually we want to use rational cubic wedges in order to support conics.
void prepareCubicWedges(GrMeshDrawOp::Target*);
void prepareTessellatedCubicWedges(GrMeshDrawOp::Target*);
void onExecute(GrOpFlushState*, const SkRect& chainBounds) override;
void drawStencilPass(GrOpFlushState*);
@ -137,6 +154,13 @@ private:
int fCubicVertexCount;
GrStencilPathShader* fStencilCubicsShader = nullptr;
// If fIndirectDrawBuffer is non-null, then we issue an indexed-indirect draw instead of using
// hardware tessellation. This is oftentimes faster than tessellation, and other times it serves
// as a polyfill when tessellation just isn't supported.
sk_sp<const GrBuffer> fIndirectDrawBuffer;
size_t fIndirectDrawOffset;
int fIndirectDrawCount;
friend class GrOpMemoryPool; // For ctor.
public:

View File

@ -37,8 +37,6 @@ GrTessellationPathRenderer::GrTessellationPathRenderer(const GrCaps& caps) : fAt
GrPathRenderer::CanDrawPath GrTessellationPathRenderer::onCanDrawPath(
const CanDrawPathArgs& args) const {
// This class should not have been added to the chain without tessellation support.
SkASSERT(args.fCaps->shaderCaps()->tessellationSupport());
if (!args.fShape->style().isSimpleFill() || args.fShape->inverseFilled() ||
args.fViewMatrix->hasPerspective()) {
return CanDrawPath::kNo;