diff --git a/bench/TessellatePathBench.cpp b/bench/TessellatePathBench.cpp index 610eadcfd6..12c3ce4250 100644 --- a/bench/TessellatePathBench.cpp +++ b/bench/TessellatePathBench.cpp @@ -10,6 +10,8 @@ #include "src/core/SkPathPriv.h" #include "src/gpu/GrContextPriv.h" #include "src/gpu/GrOpFlushState.h" +#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h" +#include "src/gpu/tessellate/GrResolveLevelCounter.h" #include "src/gpu/tessellate/GrTessellatePathOp.h" #include "src/gpu/tessellate/GrWangsFormula.h" #include "tools/ToolUtils.h" @@ -43,6 +45,7 @@ public: "FATAL: wanted %zu bytes of static vertex data; only have %zu.\n", vertexSize * vertexCount, SK_ARRAY_COUNT(fStaticVertexData)).c_str()); } + *startVertex = 0; return fStaticVertexData; } @@ -79,7 +82,7 @@ public: #undef UNIMPL private: - SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 5]; + SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 8]; GrDrawIndexedIndirectCommand fStaticDrawIndexedIndirectData[32]; SkSTArenaAlloc<1024 * 1024> fAllocator; }; @@ -95,10 +98,15 @@ public: const char* onGetName() override { return fName.c_str(); } bool isSuitableFor(Backend backend) final { return backend == kNonRendering_Backend; } - class MiddleOutInnerTrianglesBench; - class OuterCubicsBench; - class CubicWedgesBench; - class WangsFormulaBench; + class prepareMiddleOutStencilGeometry; + class prepareMiddleOutStencilGeometry_indirect; + class prepareIndirectOuterCubics; + class prepareTessellatedOuterCubics; + class prepareTessellatedCubicWedges; + class wangs_formula_cubic_log2; + class wangs_formula_cubic_log2_scale; + class wangs_formula_cubic_log2_affine; + class middle_out_triangulation; private: void onDraw(int loops, SkCanvas*) final { @@ -120,83 +128,92 @@ private: SkString fName; }; -class GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench - : public GrTessellatePathOp::TestingOnly_Benchmark { -public: - MiddleOutInnerTrianglesBench() - : TestingOnly_Benchmark("prepareMiddleOutInnerTriangles", - ToolUtils::make_star(SkRect::MakeWH(100, 100), - kNumCubicsInChalkboard), - SkMatrix::I()) { - } - void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override { - int numBeziers; - op->prepareMiddleOutInnerTriangles(target, &numBeziers); - } -}; +#define DEF_TESS_BENCH(NAME, PATH, MATRIX, TARGET, OP) \ + class GrTessellatePathOp::TestingOnly_Benchmark::NAME \ + : public GrTessellatePathOp::TestingOnly_Benchmark { \ + public: \ + NAME() : TestingOnly_Benchmark(#NAME, (PATH), (MATRIX)) {} \ + void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override; \ + }; \ + DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::NAME(); ); \ + void GrTessellatePathOp::TestingOnly_Benchmark::NAME::runBench( \ + GrMeshDrawOp::Target* TARGET, GrTessellatePathOp* op) -DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench(); ); +DEF_TESS_BENCH(prepareMiddleOutStencilGeometry, make_cubic_path(), SkMatrix::I(), target, op) { + op->prepareMiddleOutTrianglesAndCubics(target); +} -class GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench - : public GrTessellatePathOp::TestingOnly_Benchmark { -public: - OuterCubicsBench() - : TestingOnly_Benchmark("prepareOuterCubics", make_cubic_path(), SkMatrix::I()) { - } - void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override { - op->prepareOuterCubics(target, kNumCubicsInChalkboard, - CubicDataAlignment::kVertexBoundary); - } -}; +DEF_TESS_BENCH(prepareMiddleOutStencilGeometry_indirect, make_cubic_path(), SkMatrix::I(), target, + op) { + GrResolveLevelCounter resolveLevelCounter; + op->prepareMiddleOutTrianglesAndCubics(target, &resolveLevelCounter, true); +} -DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench(); ); +DEF_TESS_BENCH(prepareIndirectOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) { + GrResolveLevelCounter resolveLevelCounter; + resolveLevelCounter.reset(op->fPath, SkMatrix::I(), 4); + op->prepareIndirectOuterCubics(target, resolveLevelCounter); +} -class GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench - : public GrTessellatePathOp::TestingOnly_Benchmark { -public: - CubicWedgesBench() - : TestingOnly_Benchmark("prepareCubicWedges", make_cubic_path(), SkMatrix::I()) { - } - void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override { - op->prepareCubicWedges(target); - } -}; +DEF_TESS_BENCH(prepareTessellatedOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) { + op->prepareTessellatedOuterCubics(target, kNumCubicsInChalkboard); +} -DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench();); +DEF_TESS_BENCH(prepareTessellatedCubicWedges, make_cubic_path(), SkMatrix::I(), target, op) { + op->prepareTessellatedCubicWedges(target); +} -class GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench - : public GrTessellatePathOp::TestingOnly_Benchmark { -public: - WangsFormulaBench(const char* suffix, const SkMatrix& matrix) - : TestingOnly_Benchmark(SkStringPrintf("wangs_formula_cubic_log2%s", suffix).c_str(), - make_cubic_path(), SkMatrix::I()) - , fMatrix(matrix) { - } - void runBench(GrMeshDrawOp::Target*, GrTessellatePathOp* op) override { - int sum = 0; - GrVectorXform xform(fMatrix); - for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) { - if (verb == SkPathVerb::kCubic) { - sum += GrWangsFormula::cubic_log2(4, pts, xform); - } - } - // Don't let the compiler optimize away GrWangsFormula::cubic_log2. - if (sum <= 0) { - SK_ABORT("sum should be > 0."); +static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) { + int sum = 0; + GrVectorXform xform(matrix); + for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) { + if (verb == SkPathVerb::kCubic) { + sum += GrWangsFormula::cubic_log2(4, pts, xform); } } -private: - SkMatrix fMatrix; -}; + // Don't let the compiler optimize away GrWangsFormula::cubic_log2. + if (sum <= 0) { + SK_ABORT("sum should be > 0."); + } +} -DEF_BENCH( - return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench("", SkMatrix::I()); -); -DEF_BENCH( - return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench( - "_scale", SkMatrix::Scale(1.1f, 0.9f)); -); -DEF_BENCH( - return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench( - "_affine", SkMatrix::MakeAll(.9f,0.9f,0, 1.1f,1.1f,0, 0,0,1)); -); +DEF_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(), SkMatrix::I(), target, op) { + benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath); +} + +DEF_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(), SkMatrix::Scale(1.1f, 0.9f), + target, op) { + benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath); +} + +DEF_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(), + SkMatrix::MakeAll(.9f,0.9f,0, 1.1f,1.1f,0, 0,0,1), target, op) { + benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath); +} + +DEF_TESS_BENCH(middle_out_triangulation, + ToolUtils::make_star(SkRect::MakeWH(500, 500), kNumCubicsInChalkboard), + SkMatrix::I(), target, op) { + int baseVertex; + auto vertexData = static_cast(target->makeVertexSpace( + sizeof(SkPoint), kNumCubicsInChalkboard, nullptr, &baseVertex)); + GrMiddleOutPolygonTriangulator middleOut(vertexData, 3, kNumCubicsInChalkboard + 2); + for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) { + switch (verb) { + case SkPathVerb::kMove: + middleOut.closeAndMove(pts[0]); + break; + case SkPathVerb::kLine: + middleOut.pushVertex(pts[1]); + break; + case SkPathVerb::kClose: + middleOut.close(); + break; + case SkPathVerb::kQuad: + case SkPathVerb::kConic: + case SkPathVerb::kCubic: + SkUNREACHABLE; + } + middleOut.closeAndMove(pts[0]); + } +} diff --git a/gn/gpu.gni b/gn/gpu.gni index bc5bcf1144..c13461adf6 100644 --- a/gn/gpu.gni +++ b/gn/gpu.gni @@ -444,6 +444,7 @@ skia_gpu_sources = [ "$_src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h", "$_src/gpu/tessellate/GrMidpointContourParser.h", "$_src/gpu/tessellate/GrPathShader.h", + "$_src/gpu/tessellate/GrResolveLevelCounter.h", "$_src/gpu/tessellate/GrStencilPathShader.cpp", "$_src/gpu/tessellate/GrStencilPathShader.h", "$_src/gpu/tessellate/GrTessellatePathOp.cpp", diff --git a/src/gpu/GrPathRendererChain.cpp b/src/gpu/GrPathRendererChain.cpp index ca2d7e4c86..cf7e271f30 100644 --- a/src/gpu/GrPathRendererChain.cpp +++ b/src/gpu/GrPathRendererChain.cpp @@ -32,7 +32,7 @@ GrPathRendererChain::GrPathRendererChain(GrRecordingContext* context, const Opti fChain.push_back(sk_make_sp()); } if (options.fGpuPathRenderers & GpuPathRenderers::kTessellation) { - if (caps.shaderCaps()->tessellationSupport() && caps.drawInstancedSupport()) { + if (caps.drawInstancedSupport()) { auto tess = sk_make_sp(caps); context->priv().addOnFlushCallbackObject(tess.get()); fChain.push_back(std::move(tess)); diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h index cb4711c696..1a16595f77 100644 --- a/src/gpu/GrProcessor.h +++ b/src/gpu/GrProcessor.h @@ -162,9 +162,10 @@ public: kTessellate_GrFillBoundingBoxShader_ClassID, kTessellate_GrFillCubicHullShader_ClassID, kTessellate_GrFillTriangleShader_ClassID, - kTessellate_GrStencilCubicShader_ClassID, + kTessellate_GrMiddleOutCubicShader_ClassID, kTessellate_GrStencilTriangleShader_ClassID, - kTessellate_GrStencilWedgeShader_ClassID, + kTessellate_GrTessellateCubicShader_ClassID, + kTessellate_GrTessellateWedgeShader_ClassID, kTestFP_ClassID, kTestRectOp_ClassID, kFlatNormalsFP_ClassID, diff --git a/src/gpu/tessellate/GrResolveLevelCounter.h b/src/gpu/tessellate/GrResolveLevelCounter.h new file mode 100644 index 0000000000..c74f46cbe4 --- /dev/null +++ b/src/gpu/tessellate/GrResolveLevelCounter.h @@ -0,0 +1,74 @@ +/* + * Copyright 2020 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef GrResolveLevelCounter_DEFINED +#define GrResolveLevelCounter_DEFINED + +#include "src/core/SkPathPriv.h" +#include "src/gpu/tessellate/GrStencilPathShader.h" +#include "src/gpu/tessellate/GrWangsFormula.h" + +// This class helps bin cubics by log2 "resolveLevel" when we don't use hardware tessellation. It is +// composed of simple counters that track how many cubics we intend to draw at each resolveLevel, +// and how many resolveLevels there are that have at least one cubic. +class GrResolveLevelCounter { +public: + void reset() { + memset(fInstanceCounts, 0, sizeof(fInstanceCounts)); + SkDEBUGCODE(fHasCalledReset = true;) + } + + int reset(const SkPath& path, const SkMatrix& viewMatrix, float intolerance) { + this->reset(); + GrVectorXform xform(viewMatrix); + for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) { + switch (verb) { + case SkPathVerb::kQuad: + // Quadratics get converted to cubics before rendering. + this->countCubic(GrWangsFormula::quadratic_log2(intolerance, pts, xform)); + break; + case SkPathVerb::kCubic: + this->countCubic(GrWangsFormula::cubic_log2(intolerance, pts, xform)); + break; + default: + break; + } + } + return fTotalCubicInstanceCount; + } + + void countCubic(int resolveLevel) { + SkASSERT(fHasCalledReset); + SkASSERT(resolveLevel >= 0); + if (resolveLevel == 0) { + // Cubics with 2^0=1 segments are empty (zero area). We ignore them completely. + return; + } + resolveLevel = std::min(resolveLevel, GrMiddleOutCubicShader::kMaxResolveLevel); + if (!fInstanceCounts[resolveLevel]++) { + ++fTotalCubicIndirectDrawCount; + } + ++fTotalCubicInstanceCount; + } + + int operator[](int resolveLevel) const { + SkASSERT(fHasCalledReset); + SkASSERT(resolveLevel > 0); // Empty cubics with 2^0=1 segments do not need to be drawn. + SkASSERT(resolveLevel <= GrMiddleOutCubicShader::kMaxResolveLevel); + return fInstanceCounts[resolveLevel]; + } + int totalCubicInstanceCount() const { return fTotalCubicInstanceCount; } + int totalCubicIndirectDrawCount() const { return fTotalCubicIndirectDrawCount; } + +private: + SkDEBUGCODE(bool fHasCalledReset = false;) + int fInstanceCounts[GrMiddleOutCubicShader::kMaxResolveLevel + 1]; + int fTotalCubicInstanceCount = 0; + int fTotalCubicIndirectDrawCount = 0; +}; + +#endif diff --git a/src/gpu/tessellate/GrStencilPathShader.cpp b/src/gpu/tessellate/GrStencilPathShader.cpp index 9b36c2ac58..fadc5b16b3 100644 --- a/src/gpu/tessellate/GrStencilPathShader.cpp +++ b/src/gpu/tessellate/GrStencilPathShader.cpp @@ -35,6 +35,7 @@ constexpr char kEvalCubicFn[] = R"( })"; class GrStencilPathShader::Impl : public GrGLSLGeometryProcessor { +protected: void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override { const auto& shader = args.fGP.cast(); args.fVaryingHandler->emitAttributes(shader); @@ -45,7 +46,7 @@ class GrStencilPathShader::Impl : public GrGLSLGeometryProcessor { fViewMatrixUniform = args.fUniformHandler->addUniform( nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix); args.fVertBuilder->codeAppendf( - "float2 vertexpos = (%s * float3(point, 1)).xy;", viewMatrix); + "float2 vertexpos = (%s * float3(inputPoint, 1)).xy;", viewMatrix); vertexPos.set(kFloat2_GrSLType, "vertexpos"); } @@ -75,8 +76,8 @@ GrGLSLPrimitiveProcessor* GrStencilPathShader::createGLSLInstance(const GrShader return new Impl; } -SkString GrStencilCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls, - const GrShaderCaps&) const { +SkString GrTessellateCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls, + const GrShaderCaps&) const { SkString code(versionAndExtensionDecls); code.append(kWangsFormulaCubicFn); code.append(R"( @@ -115,8 +116,8 @@ SkString GrStencilCubicShader::getTessControlShaderGLSL(const char* versionAndEx return code; } -SkString GrStencilCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls, - const GrShaderCaps&) const { +SkString GrTessellateCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls, + const GrShaderCaps&) const { SkString code(versionAndExtensionDecls); code.append(kEvalCubicFn); code.append(R"( @@ -147,8 +148,8 @@ SkString GrStencilCubicShader::getTessEvaluationShaderGLSL(const char* versionAn return code; } -SkString GrStencilWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls, - const GrShaderCaps&) const { +SkString GrTessellateWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls, + const GrShaderCaps&) const { SkString code(versionAndExtensionDecls); code.append(kWangsFormulaCubicFn); code.append(R"( @@ -183,8 +184,8 @@ SkString GrStencilWedgeShader::getTessControlShaderGLSL(const char* versionAndEx return code; } -SkString GrStencilWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls, - const GrShaderCaps&) const { +SkString GrTessellateWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls, + const GrShaderCaps&) const { SkString code(versionAndExtensionDecls); code.append(kEvalCubicFn); code.append(R"( @@ -218,3 +219,109 @@ SkString GrStencilWedgeShader::getTessEvaluationShaderGLSL(const char* versionAn return code; } + +GR_DECLARE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey); + +sk_sp GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer( + GrResourceProvider* resourceProvider) { + GR_DEFINE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey); + if (auto buffer = resourceProvider->findByUniqueKey(gMiddleOutIndexBufferKey)) { + return std::move(buffer); + } + + // One explicit triangle at index 0, and one middle-out cubic with kMaxResolveLevel line + // segments beginning at index 3. + constexpr static int indexCount = 3 + NumVerticesAtResolveLevel(kMaxResolveLevel); + auto buffer = resourceProvider->createBuffer( + indexCount * sizeof(uint16_t), GrGpuBufferType::kIndex, kStatic_GrAccessPattern); + if (!buffer) { + return nullptr; + } + + // We shouldn't bin and/or cache static buffers. + SkASSERT(buffer->size() == indexCount * sizeof(uint16_t)); + SkASSERT(!buffer->resourcePriv().getScratchKey().isValid()); + auto indexData = static_cast(buffer->map()); + SkAutoTMalloc stagingBuffer; + if (!indexData) { + SkASSERT(!buffer->isMapped()); + indexData = stagingBuffer.reset(indexCount); + } + + // Indices 0,1,2 contain special values that emit points P0, P1, and P2 respectively. (When the + // vertex shader is fed an index value larger than (1 << kMaxResolveLevel), it emits + // P[index % 4].) + int i = 0; + indexData[i++] = (1 << kMaxResolveLevel) + 4; // % 4 == 0 + indexData[i++] = (1 << kMaxResolveLevel) + 5; // % 4 == 1 + indexData[i++] = (1 << kMaxResolveLevel) + 6; // % 4 == 2 + + // Starting at index 3, we triangulate a cubic with 2^kMaxResolveLevel line segments. Each + // index value corresponds to parametric value T=(index / 2^kMaxResolveLevel). Since the + // triangles are arranged in "middle-out" order, we will be able to conveniently control the + // resolveLevel by changing only the indexCount. + for (uint16_t advance = 1 << (kMaxResolveLevel - 1); advance; advance >>= 1) { + uint16_t T = 0; + do { + indexData[i++] = T; + indexData[i++] = (T += advance); + indexData[i++] = (T += advance); + } while (T != (1 << kMaxResolveLevel)); + } + SkASSERT(i == indexCount); + + if (buffer->isMapped()) { + buffer->unmap(); + } else { + buffer->updateData(stagingBuffer, indexCount * sizeof(uint16_t)); + } + buffer->resourcePriv().setUniqueKey(gMiddleOutIndexBufferKey); + return std::move(buffer); +} + +class GrMiddleOutCubicShader::Impl : public GrStencilPathShader::Impl { + void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override { + const auto& shader = args.fGP.cast(); + args.fVaryingHandler->emitAttributes(shader); + args.fVertBuilder->defineConstant("kMaxResolveLevel", kMaxResolveLevel); + args.fVertBuilder->codeAppend(R"( + float4x2 P = float4x2(inputPoints_0_1, inputPoints_2_3); + float2 point; + if (sk_VertexID > (1 << kMaxResolveLevel)) { + // This is a special index value that wants us to emit a specific point. + point = P[sk_VertexID & 3]; + } else {)"); + // Evaluate the cubic at T=(sk_VertexID / 2^kMaxResolveLevel). + if (args.fShaderCaps->fpManipulationSupport()) { + args.fVertBuilder->codeAppend(R"( + float T = ldexp(sk_VertexID, -kMaxResolveLevel);)"); + } else { + args.fVertBuilder->codeAppend(R"( + float T = sk_VertexID / float(1 << kMaxResolveLevel);)"); + } + args.fVertBuilder->codeAppend(R"( + float2 ab = mix(P[0], P[1], T); + float2 bc = mix(P[1], P[2], T); + float2 cd = mix(P[2], P[3], T); + float2 abc = mix(ab, bc, T); + float2 bcd = mix(bc, cd, T); + point = mix(abc, bcd, T); + })"); + + GrShaderVar vertexPos("point", kFloat2_GrSLType); + if (!shader.viewMatrix().isIdentity()) { + const char* viewMatrix; + fViewMatrixUniform = args.fUniformHandler->addUniform( + nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix); + args.fVertBuilder->codeAppendf(R"( + float2 transformedPoint = (%s * float3(point, 1)).xy;)", viewMatrix); + vertexPos.set(kFloat2_GrSLType, "transformedPoint"); + } + gpArgs->fPositionVar = vertexPos; + // No fragment shader. + } +}; + +GrGLSLPrimitiveProcessor* GrMiddleOutCubicShader::createGLSLInstance(const GrShaderCaps&) const { + return new Impl; +} diff --git a/src/gpu/tessellate/GrStencilPathShader.h b/src/gpu/tessellate/GrStencilPathShader.h index 0aa0b5983b..c331726eb9 100644 --- a/src/gpu/tessellate/GrStencilPathShader.h +++ b/src/gpu/tessellate/GrStencilPathShader.h @@ -17,16 +17,15 @@ public: GrStencilPathShader(ClassID classID, const SkMatrix& viewMatrix, GrPrimitiveType primitiveType, int tessellationPatchVertexCount = 0) : GrPathShader(classID, viewMatrix, primitiveType, tessellationPatchVertexCount) { - constexpr static Attribute kPointAttrib = { - "point", kFloat2_GrVertexAttribType, kFloat2_GrSLType}; - this->setVertexAttributes(&kPointAttrib, 1); } -private: - void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const final { +protected: + constexpr static Attribute kSinglePointAttrib{"inputPoint", kFloat2_GrVertexAttribType, + kFloat2_GrSLType}; + void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override { b->add32(this->viewMatrix().isIdentity()); } - GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const final; + GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override; class Impl; }; @@ -35,18 +34,21 @@ private: class GrStencilTriangleShader : public GrStencilPathShader { public: GrStencilTriangleShader(const SkMatrix& viewMatrix) : GrStencilPathShader( - kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) {} + kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) { + this->setVertexAttributes(&kSinglePointAttrib, 1); + } const char* name() const override { return "tessellate_GrStencilTriangleShader"; } }; -// Uses GPU tessellation shaders to linearize, triangulate, and render standalone cubics. Here, a -// "cubic" is a standalone closed contour consisting of a single cubic bezier. +// Uses GPU tessellation shaders to linearize, triangulate, and render standalone closed cubics. // TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics. -class GrStencilCubicShader : public GrStencilPathShader { +class GrTessellateCubicShader : public GrStencilPathShader { public: - GrStencilCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader( - kTessellate_GrStencilCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) {} - const char* name() const override { return "tessellate_GrStencilCubicShader"; } + GrTessellateCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader( + kTessellate_GrTessellateCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) { + this->setVertexAttributes(&kSinglePointAttrib, 1); + } + const char* name() const override { return "tessellate_GrTessellateCubicShader"; } private: SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls, @@ -59,11 +61,13 @@ private: // wedge is a 5-point patch consisting of 4 cubic control points, plus an anchor point fanning from // the center of the curve's resident contour. // TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics. -class GrStencilWedgeShader : public GrStencilPathShader { +class GrTessellateWedgeShader : public GrStencilPathShader { public: - GrStencilWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader( - kTessellate_GrStencilWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) {} - const char* name() const override { return "tessellate_GrStencilWedgeShader"; } + GrTessellateWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader( + kTessellate_GrTessellateWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) { + this->setVertexAttributes(&kSinglePointAttrib, 1); + } + const char* name() const override { return "tessellate_GrTessellateWedgeShader"; } private: SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls, @@ -72,4 +76,69 @@ private: const GrShaderCaps&) const override; }; +// Uses indirect (instanced) draws to triangulate standalone closed cubics with a "middle-out" +// topology. The caller must compute each cubic's resolveLevel on the CPU (i.e., the log2 number of +// line segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2), and then +// sort the instance buffer by resolveLevel for efficient batching of indirect draws. +class GrMiddleOutCubicShader : public GrStencilPathShader { +public: + // Each resolveLevel linearizes the curve into 2^resolveLevel line segments. The finest + // supported resolveLevel is therefore 2^12=4096 line segments. + constexpr static int kMaxResolveLevel = 12; + + // How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel + // line segments? + constexpr static int NumVerticesAtResolveLevel(int resolveLevel) { + // resolveLevel=0 -> 0 line segments -> 0 triangles -> 0 vertices + // resolveLevel=1 -> 2 line segments -> 1 triangle -> 3 vertices + // resolveLevel=2 -> 4 line segments -> 3 triangles -> 9 vertices + // resolveLevel=3 -> 8 line segments -> 7 triangles -> 21 vertices + // ... + return ((1 << resolveLevel) - 1) * 3; + } + + // Configures an indirect draw to render cubic instances with 2^resolveLevel evenly-spaced (in + // the parametric sense) line segments. + static GrDrawIndexedIndirectCommand MakeDrawCubicsIndirectCmd(int resolveLevel, + uint32_t instanceCount, + uint32_t baseInstance) { + SkASSERT(resolveLevel > 0 && resolveLevel <= kMaxResolveLevel); + // Starting at baseIndex=3, the index buffer triangulates a cubic with 2^kMaxResolveLevel + // line segments. Each index value corresponds to a parametric T value on the curve. Since + // the triangles are arranged in "middle-out" order, we can conveniently control the + // resolveLevel by changing only the indexCount. + uint32_t indexCount = NumVerticesAtResolveLevel(resolveLevel); + return {indexCount, instanceCount, 3, 0, baseInstance}; + } + + // For performance reasons we can often express triangles as an indirect cubic draw and sneak + // them in alongside the other indirect draws. This method configures an indirect draw to emit + // the triangle [P0, P1, P2] from a 4-point instance. + static GrDrawIndexedIndirectCommand MakeDrawTrianglesIndirectCmd(uint32_t instanceCount, + uint32_t baseInstance) { + // Indices 0,1,2 have special index values that emit points P0, P1, and P2 respectively. + return {3, instanceCount, 0, 0, baseInstance}; + } + + // Returns the index buffer that should be bound when drawing with this shader. + // (Our vertex shader uses raw index values directly, so there is no vertex buffer.) + static sk_sp FindOrMakeMiddleOutIndexBuffer(GrResourceProvider*); + + GrMiddleOutCubicShader(const SkMatrix& viewMatrix) + : GrStencilPathShader(kTessellate_GrMiddleOutCubicShader_ClassID, viewMatrix, + GrPrimitiveType::kTriangles) { + constexpr static Attribute kInputPtsAttribs[] = { + {"inputPoints_0_1", kFloat4_GrVertexAttribType, kFloat4_GrSLType}, + {"inputPoints_2_3", kFloat4_GrVertexAttribType, kFloat4_GrSLType}}; + this->setInstanceAttributes(kInputPtsAttribs, 2); + } + + const char* name() const override { return "tessellate_GrMiddleOutCubicShader"; } + +private: + GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override; + + class Impl; +}; + #endif diff --git a/src/gpu/tessellate/GrTessellatePathOp.cpp b/src/gpu/tessellate/GrTessellatePathOp.cpp index cf79daa7a7..cb1bf37584 100644 --- a/src/gpu/tessellate/GrTessellatePathOp.cpp +++ b/src/gpu/tessellate/GrTessellatePathOp.cpp @@ -14,8 +14,12 @@ #include "src/gpu/tessellate/GrFillPathShader.h" #include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h" #include "src/gpu/tessellate/GrMidpointContourParser.h" +#include "src/gpu/tessellate/GrResolveLevelCounter.h" #include "src/gpu/tessellate/GrStencilPathShader.h" +constexpr static int kMaxResolveLevel = GrMiddleOutCubicShader::kMaxResolveLevel; +constexpr static float kTessellationIntolerance = 4; // 1/4 of a pixel. + GrTessellatePathOp::FixedFunctionFlags GrTessellatePathOp::fixedFunctionFlags() const { auto flags = FixedFunctionFlags::kUsesStencil; if (GrAAType::kNone != fAAType) { @@ -30,50 +34,69 @@ void GrTessellatePathOp::onPrePrepare(GrRecordingContext*, const GrXferProcessor::DstProxyView&) { } -void GrTessellatePathOp::onPrepare(GrOpFlushState* state) { - // First check if the path is large and/or simple enough that we can actually triangulate the - // inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the - // curves, and then fill the internal polygons directly to the final render target, thus filling - // in the majority of pixels in a single render pass. - SkScalar scales[2]; - SkAssertResult(fViewMatrix.getMinMaxScales(scales)); // Will fail if perspective. - const SkRect& bounds = fPath.getBounds(); +void GrTessellatePathOp::onPrepare(GrOpFlushState* flushState) { int numVerbs = fPath.countVerbs(); if (numVerbs <= 0) { return; } + + // First check if the path is large and/or simple enough that we can actually triangulate the + // inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the + // curves, and then fill the internal polygons directly to the final render target, thus drawing + // the majority of pixels in a single render pass. + SkScalar scales[2]; + SkAssertResult(fViewMatrix.getMinMaxScales(scales)); // Will fail if perspective. + const SkRect& bounds = fPath.getBounds(); float gpuFragmentWork = bounds.height() * scales[0] * bounds.width() * scales[1]; float cpuTessellationWork = (float)numVerbs * SkNextLog2(numVerbs); // N log N. if (cpuTessellationWork * 500 + (256 * 256) < gpuFragmentWork) { // Don't try below 256x256. - int numCountedCurves; + int numCountedCubics; // This will fail if the inner triangles do not form a simple polygon (e.g., self // intersection, double winding). - if (this->prepareNonOverlappingInnerTriangles(state, &numCountedCurves)) { - // Prepare cubics on an instance boundary so we can use the buffer to fill local convex - // hulls as well. - this->prepareOuterCubics(state, numCountedCurves, - CubicDataAlignment::kInstanceBoundary); + if (this->prepareNonOverlappingInnerTriangles(flushState, &numCountedCubics)) { + if (!numCountedCubics) { + return; + } + // Always use indirect draws for cubics instead of tessellation here. Our goal in this + // mode is to maximize GPU performance, and the middle-out topology used by our indirect + // draws is easier on the rasterizer than a tessellated fan. There also seems to be a + // small amount of fixed tessellation overhead that this avoids. + // + // NOTE: This will count fewer cubics than above if it discards any for resolveLevel=0. + GrResolveLevelCounter resolveLevelCounter; + numCountedCubics = resolveLevelCounter.reset(fPath, fViewMatrix, + kTessellationIntolerance); + this->prepareIndirectOuterCubics(flushState, resolveLevelCounter); return; } } - // Next see if we can split up inner polygon triangles and curves, and triangulate the inner - // polygon(s) more efficiently. This causes greater CPU overhead due to the extra shaders and - // draw calls, but the better triangulation can reduce the rasterizer load by a great deal on - // complex paths. + // When there are only a few verbs, it seems to always be fastest to make a single indirect draw + // that contains both the inner triangles and the outer cubics, instead of using hardware + // tessellation. Also take this path if tessellation is not supported. + bool drawTrianglesAsIndirectCubicDraw = (numVerbs < 50); + if (drawTrianglesAsIndirectCubicDraw || + !flushState->caps().shaderCaps()->tessellationSupport()) { + // Prepare outer cubics with indirect draws. + GrResolveLevelCounter resolveLevelCounter; + this->prepareMiddleOutTrianglesAndCubics(flushState, &resolveLevelCounter, + drawTrianglesAsIndirectCubicDraw); + return; + } + + // Next see if we can split up the inner triangles and outer cubics into two draw calls. This + // allows for a more efficient inner triangle topology that can reduce the rasterizer load by a + // large margin on complex paths, but also causes greater CPU overhead due to the extra shader + // switches and draw calls. // NOTE: Raster-edge work is 1-dimensional, so we sum height and width instead of multiplying. float rasterEdgeWork = (bounds.height() + bounds.width()) * scales[1] * fPath.countVerbs(); - if (rasterEdgeWork > 1000 * 1000) { - int numCountedCurves; - this->prepareMiddleOutInnerTriangles(state, &numCountedCurves); - // We will fill the path with a bounding box instead local cubic convex hulls, so there is - // no need to prepare the cubics on an instance boundary. - this->prepareOuterCubics(state, numCountedCurves, CubicDataAlignment::kVertexBoundary); + if (rasterEdgeWork > 300 * 300) { + this->prepareMiddleOutTrianglesAndCubics(flushState); return; } // Fastest CPU approach: emit one cubic wedge per verb, fanning out from the center. - this->prepareCubicWedges(state); + this->prepareTessellatedCubicWedges(flushState); } bool GrTessellatePathOp::prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target* target, @@ -106,40 +129,74 @@ bool GrTessellatePathOp::prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Targe return true; } -void GrTessellatePathOp::prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target* target, - int* numCountedCurves) { +void GrTessellatePathOp::prepareMiddleOutTrianglesAndCubics( + GrMeshDrawOp::Target* target, GrResolveLevelCounter* resolveLevelCounter, + bool drawTrianglesAsIndirectCubicDraw) { SkASSERT(!fTriangleBuffer); SkASSERT(!fDoStencilTriangleBuffer); SkASSERT(!fDoFillTriangleBuffer); + SkASSERT(!fCubicBuffer); + SkASSERT(!fStencilCubicsShader); + SkASSERT(!fIndirectDrawBuffer); // No initial moveTo, plus an implicit close at the end; n-2 triangles fill an n-gon. - // Each triangle has 3 vertices. - int maxVertices = (fPath.countVerbs() - 1) * 3; + int maxInnerTriangles = fPath.countVerbs() - 1; + int maxCubics = fPath.countVerbs(); - GrEagerDynamicVertexAllocator vertexAlloc(target, &fTriangleBuffer, &fBaseTriangleVertex); - auto* vertexData = vertexAlloc.lock(maxVertices); + SkPoint* vertexData; + int vertexAdvancePerTriangle; + if (drawTrianglesAsIndirectCubicDraw) { + // Allocate the triangles as 4-point instances at the beginning of the cubic buffer. + SkASSERT(resolveLevelCounter); + vertexAdvancePerTriangle = 4; + int baseTriangleInstance; + vertexData = static_cast(target->makeVertexSpace( + sizeof(SkPoint) * 4, maxInnerTriangles + maxCubics, &fCubicBuffer, + &baseTriangleInstance)); + fBaseCubicVertex = baseTriangleInstance * 4; + } else { + // Allocate the triangles as normal 3-point instances in the triangle buffer. + vertexAdvancePerTriangle = 3; + vertexData = static_cast(target->makeVertexSpace( + sizeof(SkPoint), maxInnerTriangles * 3, &fTriangleBuffer, &fBaseTriangleVertex)); + } if (!vertexData) { return; } - constexpr static int kNumVerticesPerTriangle = 3; - GrMiddleOutPolygonTriangulator middleOut(vertexData, kNumVerticesPerTriangle, maxVertices); - int localCurveCount = 0; + GrVectorXform xform(fViewMatrix); + GrMiddleOutPolygonTriangulator middleOut(vertexData, vertexAdvancePerTriangle, + fPath.countVerbs()); + if (resolveLevelCounter) { + resolveLevelCounter->reset(); + } + int numCountedCurves = 0; for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) { switch (verb) { case SkPathVerb::kMove: - middleOut.closeAndMove(*pts++); + middleOut.closeAndMove(pts[0]); break; case SkPathVerb::kLine: middleOut.pushVertex(pts[1]); break; case SkPathVerb::kQuad: middleOut.pushVertex(pts[2]); - ++localCurveCount; + if (resolveLevelCounter) { + // Quadratics get converted to cubics before rendering. + resolveLevelCounter->countCubic(GrWangsFormula::quadratic_log2( + kTessellationIntolerance, pts, xform)); + break; + } + ++numCountedCurves; break; case SkPathVerb::kCubic: middleOut.pushVertex(pts[3]); - ++localCurveCount; + if (resolveLevelCounter) { + resolveLevelCounter->countCubic(GrWangsFormula::cubic_log2( + kTessellationIntolerance, pts, xform)); + break; + } + ++numCountedCurves; break; case SkPathVerb::kClose: middleOut.close(); @@ -148,13 +205,31 @@ void GrTessellatePathOp::prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target* ta SkUNREACHABLE; } } - fTriangleVertexCount = middleOut.close() * kNumVerticesPerTriangle; - *numCountedCurves = localCurveCount; + int triangleCount = middleOut.close(); + SkASSERT(triangleCount <= maxInnerTriangles); - vertexAlloc.unlock(fTriangleVertexCount); - - if (fTriangleVertexCount) { - fDoStencilTriangleBuffer = true; + if (drawTrianglesAsIndirectCubicDraw) { + SkASSERT(resolveLevelCounter); + int totalInstanceCount = triangleCount + resolveLevelCounter->totalCubicInstanceCount(); + SkASSERT(vertexAdvancePerTriangle == 4); + target->putBackVertices(maxInnerTriangles + maxCubics - totalInstanceCount, + sizeof(SkPoint) * 4); + if (totalInstanceCount) { + this->prepareIndirectOuterCubicsAndTriangles(target, *resolveLevelCounter, vertexData, + triangleCount); + } + } else { + SkASSERT(vertexAdvancePerTriangle == 3); + target->putBackVertices(maxInnerTriangles - triangleCount, sizeof(SkPoint) * 3); + fTriangleVertexCount = triangleCount * 3; + if (fTriangleVertexCount) { + fDoStencilTriangleBuffer = true; + } + if (resolveLevelCounter) { + this->prepareIndirectOuterCubics(target, *resolveLevelCounter); + } else { + this->prepareTessellatedOuterCubics(target, numCountedCurves); + } } } @@ -177,8 +252,133 @@ static void quad2cubic(const SkPoint pts[], SkPoint* out) { out[3] = pts[2]; } -void GrTessellatePathOp::prepareOuterCubics(GrMeshDrawOp::Target* target, int numCountedCurves, - CubicDataAlignment alignment) { +void GrTessellatePathOp::prepareIndirectOuterCubics( + GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter) { + SkASSERT(resolveLevelCounter.totalCubicInstanceCount() >= 0); + if (resolveLevelCounter.totalCubicInstanceCount() == 0) { + return; + } + // Allocate a buffer to store the cubic data. + SkPoint* cubicData; + int baseInstance; + cubicData = static_cast(target->makeVertexSpace( + sizeof(SkPoint) * 4, resolveLevelCounter.totalCubicInstanceCount(), &fCubicBuffer, + &baseInstance)); + if (!cubicData) { + return; + } + fBaseCubicVertex = baseInstance * 4; + this->prepareIndirectOuterCubicsAndTriangles(target, resolveLevelCounter, cubicData, + /*numTrianglesAtBeginningOfData=*/0); +} + +void GrTessellatePathOp::prepareIndirectOuterCubicsAndTriangles( + GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter, + SkPoint* cubicData, int numTrianglesAtBeginningOfData) { + SkASSERT(numTrianglesAtBeginningOfData + resolveLevelCounter.totalCubicInstanceCount() > 0); + SkASSERT(!fStencilCubicsShader); + SkASSERT(cubicData); + + // Here we treat fCubicBuffer as an instance buffer. It should have been prepared with the base + // vertex on an instance boundary in order to accommodate this. + SkASSERT(fBaseCubicVertex % 4 == 0); + int baseInstance = fBaseCubicVertex >> 2; + + // Start preparing the indirect draw buffer. + fIndirectDrawCount = resolveLevelCounter.totalCubicIndirectDrawCount(); + if (numTrianglesAtBeginningOfData) { + ++fIndirectDrawCount; // Add an indirect draw for the triangles at the beginning. + } + + // Allocate space for the GrDrawIndexedIndirectCommand structs. + GrDrawIndexedIndirectCommand* indirectData = target->makeDrawIndexedIndirectSpace( + fIndirectDrawCount, &fIndirectDrawBuffer, &fIndirectDrawOffset); + if (!indirectData) { + SkASSERT(!fIndirectDrawBuffer); + return; + } + + // Fill out the GrDrawIndexedIndirectCommand structs and determine the starting instance data + // location at each resolve level. + SkPoint* instanceLocations[kMaxResolveLevel + 1]; + int indirectIdx = 0; + int runningInstanceCount = 0; + if (numTrianglesAtBeginningOfData) { + // The caller has already packed "triangleInstanceCount" triangles into 4-point instances + // at the beginning of the instance buffer. Add a special-case indirect draw here that will + // emit the triangles [P0, P1, P2] from these 4-point instances. + indirectData[0] = GrMiddleOutCubicShader::MakeDrawTrianglesIndirectCmd( + numTrianglesAtBeginningOfData, baseInstance); + indirectIdx = 1; + runningInstanceCount = numTrianglesAtBeginningOfData; + } + for (int resolveLevel = 1; resolveLevel <= kMaxResolveLevel; ++resolveLevel) { + instanceLocations[resolveLevel] = cubicData + runningInstanceCount * 4; + if (int instanceCountAtCurrLevel = resolveLevelCounter[resolveLevel]) { + indirectData[indirectIdx++] = GrMiddleOutCubicShader::MakeDrawCubicsIndirectCmd( + resolveLevel, instanceCountAtCurrLevel, baseInstance + runningInstanceCount); + runningInstanceCount += instanceCountAtCurrLevel; + } + } + +#ifdef SK_DEBUG + SkASSERT(indirectIdx == fIndirectDrawCount); + SkASSERT(runningInstanceCount == numTrianglesAtBeginningOfData + + resolveLevelCounter.totalCubicInstanceCount()); + SkASSERT(fIndirectDrawCount > 0); + + SkPoint* endLocations[kMaxResolveLevel + 1]; + memcpy(endLocations, instanceLocations + 1, kMaxResolveLevel * sizeof(SkPoint*)); + int totalInstanceCount = numTrianglesAtBeginningOfData + + resolveLevelCounter.totalCubicInstanceCount(); + endLocations[kMaxResolveLevel] = cubicData + totalInstanceCount * 4; +#endif + + fCubicVertexCount = numTrianglesAtBeginningOfData * 4; + + if (resolveLevelCounter.totalCubicInstanceCount()) { + GrVectorXform xform(fViewMatrix); + for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) { + int level; + switch (verb) { + default: + continue; + case SkPathVerb::kQuad: + level = GrWangsFormula::quadratic_log2(kTessellationIntolerance, pts, xform); + if (level == 0) { + continue; + } + level = std::min(level, kMaxResolveLevel); + quad2cubic(pts, instanceLocations[level]); + break; + case SkPathVerb::kCubic: + level = GrWangsFormula::cubic_log2(kTessellationIntolerance, pts, xform); + if (level == 0) { + continue; + } + level = std::min(level, kMaxResolveLevel); + memcpy(instanceLocations[level], pts, sizeof(SkPoint) * 4); + break; + } + instanceLocations[level] += 4; + fCubicVertexCount += 4; + } + } + +#ifdef SK_DEBUG + for (int i = 1; i <= kMaxResolveLevel; ++i) { + SkASSERT(instanceLocations[i] == endLocations[i]); + } + SkASSERT(fCubicVertexCount == (numTrianglesAtBeginningOfData + + resolveLevelCounter.totalCubicInstanceCount()) * 4); +#endif + + fStencilCubicsShader = target->allocator()->make(fViewMatrix); +} + +void GrTessellatePathOp::prepareTessellatedOuterCubics(GrMeshDrawOp::Target* target, + int numCountedCurves) { + SkASSERT(numCountedCurves >= 0); SkASSERT(!fCubicBuffer); SkASSERT(!fStencilCubicsShader); @@ -186,41 +386,34 @@ void GrTessellatePathOp::prepareOuterCubics(GrMeshDrawOp::Target* target, int nu return; } - bool instanceAligned = (alignment == CubicDataAlignment::kInstanceBoundary); - int instanceOrVertexStride = (instanceAligned) ? sizeof(SkPoint) * 4 : sizeof(SkPoint); - int instanceOrVertexCount = (instanceAligned) ? numCountedCurves : numCountedCurves * 4; - int baseInstanceOrVertex; - auto* vertexData = static_cast(target->makeVertexSpace( - instanceOrVertexStride, instanceOrVertexCount, &fCubicBuffer, &baseInstanceOrVertex)); + sizeof(SkPoint), numCountedCurves * 4, &fCubicBuffer, &fBaseCubicVertex)); if (!vertexData) { return; } - fBaseCubicVertex = (instanceAligned) ? baseInstanceOrVertex * 4 : baseInstanceOrVertex; fCubicVertexCount = 0; for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) { switch (verb) { + default: + continue; case SkPathVerb::kQuad: SkASSERT(fCubicVertexCount < numCountedCurves * 4); quad2cubic(pts, vertexData + fCubicVertexCount); - fCubicVertexCount += 4; break; case SkPathVerb::kCubic: SkASSERT(fCubicVertexCount < numCountedCurves * 4); memcpy(vertexData + fCubicVertexCount, pts, sizeof(SkPoint) * 4); - fCubicVertexCount += 4; - break; - default: break; } + fCubicVertexCount += 4; } SkASSERT(fCubicVertexCount == numCountedCurves * 4); - fStencilCubicsShader = target->allocator()->make(fViewMatrix); + fStencilCubicsShader = target->allocator()->make(fViewMatrix); } -void GrTessellatePathOp::prepareCubicWedges(GrMeshDrawOp::Target* target) { +void GrTessellatePathOp::prepareTessellatedCubicWedges(GrMeshDrawOp::Target* target) { SkASSERT(!fCubicBuffer); SkASSERT(!fStencilCubicsShader); @@ -275,18 +468,18 @@ void GrTessellatePathOp::prepareCubicWedges(GrMeshDrawOp::Target* target) { vertexAlloc.unlock(fCubicVertexCount); if (fCubicVertexCount) { - fStencilCubicsShader = target->allocator()->make(fViewMatrix); + fStencilCubicsShader = target->allocator()->make(fViewMatrix); } } -void GrTessellatePathOp::onExecute(GrOpFlushState* state, const SkRect& chainBounds) { - this->drawStencilPass(state); +void GrTessellatePathOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) { + this->drawStencilPass(flushState); if (!(Flags::kStencilOnly & fFlags)) { - this->drawCoverPass(state); + this->drawCoverPass(flushState); } } -void GrTessellatePathOp::drawStencilPass(GrOpFlushState* state) { +void GrTessellatePathOp::drawStencilPass(GrOpFlushState* flushState) { // Increments clockwise triangles and decrements counterclockwise. Used for "winding" fill. constexpr static GrUserStencilSettings kIncrDecrStencil( GrUserStencilSettings::StaticInitSeparate< @@ -311,41 +504,49 @@ void GrTessellatePathOp::drawStencilPass(GrOpFlushState* state) { if (GrAAType::kNone != fAAType) { initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias; } - if (state->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) { + if (flushState->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) { initArgs.fInputFlags |= GrPipeline::InputFlags::kWireframe; } SkASSERT(SkPathFillType::kWinding == fPath.getFillType() || SkPathFillType::kEvenOdd == fPath.getFillType()); initArgs.fUserStencil = (SkPathFillType::kWinding == fPath.getFillType()) ? &kIncrDecrStencil : &kInvertStencil; - initArgs.fCaps = &state->caps(); + initArgs.fCaps = &flushState->caps(); GrPipeline pipeline(initArgs, GrDisableColorXPFactory::MakeXferProcessor(), - state->appliedHardClip()); + flushState->appliedHardClip()); if (fDoStencilTriangleBuffer) { SkASSERT(fTriangleBuffer); GrStencilTriangleShader stencilTriangleShader(fViewMatrix); - GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, + GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, &stencilTriangleShader); - state->bindPipelineAndScissorClip(programInfo, this->bounds()); - state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get()); - state->draw(fTriangleVertexCount, fBaseTriangleVertex); + flushState->bindPipelineAndScissorClip(programInfo, this->bounds()); + flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get()); + flushState->draw(fTriangleVertexCount, fBaseTriangleVertex); } if (fStencilCubicsShader) { - GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, fStencilCubicsShader); - state->bindPipelineAndScissorClip(programInfo, this->bounds()); - state->bindBuffers(nullptr, nullptr, fCubicBuffer.get()); - state->draw(fCubicVertexCount, fBaseCubicVertex); - } - - // http://skbug.com/9739 - if (state->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) { - state->gpu()->insertManualFramebufferBarrier(); + SkASSERT(fCubicBuffer); + GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, + fStencilCubicsShader); + flushState->bindPipelineAndScissorClip(programInfo, this->bounds()); + if (fIndirectDrawBuffer) { + auto indexBuffer = GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer( + flushState->resourceProvider()); + flushState->bindBuffers(indexBuffer.get(), fCubicBuffer.get(), nullptr); + flushState->drawIndexedIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset, + fIndirectDrawCount); + } else { + flushState->bindBuffers(nullptr, nullptr, fCubicBuffer.get()); + flushState->draw(fCubicVertexCount, fBaseCubicVertex); + if (flushState->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) { + flushState->gpu()->insertManualFramebufferBarrier(); // http://skbug.com/9739 + } + } } } -void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) { +void GrTessellatePathOp::drawCoverPass(GrOpFlushState* flushState) { // Allows non-zero stencil values to pass and write a color, and resets the stencil value back // to zero; discards immediately on stencil values of zero. // NOTE: It's ok to not check the clip here because the previous stencil pass only wrote to @@ -362,7 +563,7 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) { GrPipeline::InitArgs initArgs; if (GrAAType::kNone != fAAType) { initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias; - if (1 == state->proxy()->numSamples()) { + if (1 == flushState->proxy()->numSamples()) { SkASSERT(GrAAType::kCoverage == fAAType); // We are mixed sampled. Use conservative raster to make the sample coverage mask 100% // at every fragment. This way we will still get a double hit on shared edges, but @@ -371,10 +572,10 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) { initArgs.fInputFlags |= GrPipeline::InputFlags::kConservativeRaster; } } - initArgs.fCaps = &state->caps(); - initArgs.fDstProxyView = state->drawOpArgs().dstProxyView(); - initArgs.fWriteSwizzle = state->drawOpArgs().writeSwizzle(); - GrPipeline pipeline(initArgs, std::move(fProcessors), state->detachAppliedClip()); + initArgs.fCaps = &flushState->caps(); + initArgs.fDstProxyView = flushState->drawOpArgs().dstProxyView(); + initArgs.fWriteSwizzle = flushState->drawOpArgs().writeSwizzle(); + GrPipeline pipeline(initArgs, std::move(fProcessors), flushState->detachAppliedClip()); if (fDoFillTriangleBuffer) { SkASSERT(fTriangleBuffer); @@ -421,29 +622,32 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) { } GrFillTriangleShader fillTriangleShader(fViewMatrix, fColor); - GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillTriangleShader); - state->bindPipelineAndScissorClip(programInfo, this->bounds()); - state->bindTextures(fillTriangleShader, nullptr, pipeline); - state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get()); - state->draw(fTriangleVertexCount, fBaseTriangleVertex); + GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, + &fillTriangleShader); + flushState->bindPipelineAndScissorClip(programInfo, this->bounds()); + flushState->bindTextures(fillTriangleShader, nullptr, pipeline); + flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get()); + flushState->draw(fTriangleVertexCount, fBaseTriangleVertex); if (fStencilCubicsShader) { + SkASSERT(fCubicBuffer); + // At this point, every pixel is filled in except the ones touched by curves. Issue a // final cover pass over the curves by drawing their convex hulls. This will fill in any // remaining samples and reset the stencil buffer. pipeline.setUserStencil(&kTestAndResetStencil); GrFillCubicHullShader fillCubicHullShader(fViewMatrix, fColor); - GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, + GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, &fillCubicHullShader); - state->bindPipelineAndScissorClip(programInfo, this->bounds()); - state->bindTextures(fillCubicHullShader, nullptr, pipeline); + flushState->bindPipelineAndScissorClip(programInfo, this->bounds()); + flushState->bindTextures(fillCubicHullShader, nullptr, pipeline); // Here we treat fCubicBuffer as an instance buffer. It should have been prepared with // the base vertex on an instance boundary in order to accommodate this. SkASSERT((fCubicVertexCount % 4) == 0); SkASSERT((fBaseCubicVertex % 4) == 0); - state->bindBuffers(nullptr, fCubicBuffer.get(), nullptr); - state->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0); + flushState->bindBuffers(nullptr, fCubicBuffer.get(), nullptr); + flushState->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0); } return; } @@ -451,9 +655,10 @@ void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) { // There are no triangles to fill. Just draw a bounding box. pipeline.setUserStencil(&kTestAndResetStencil); GrFillBoundingBoxShader fillBoundingBoxShader(fViewMatrix, fColor, fPath.getBounds()); - GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillBoundingBoxShader); - state->bindPipelineAndScissorClip(programInfo, this->bounds()); - state->bindTextures(fillBoundingBoxShader, nullptr, pipeline); - state->bindBuffers(nullptr, nullptr, nullptr); - state->draw(4, 0); + GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline, + &fillBoundingBoxShader); + flushState->bindPipelineAndScissorClip(programInfo, this->bounds()); + flushState->bindTextures(fillBoundingBoxShader, nullptr, pipeline); + flushState->bindBuffers(nullptr, nullptr, nullptr); + flushState->draw(4, 0); } diff --git a/src/gpu/tessellate/GrTessellatePathOp.h b/src/gpu/tessellate/GrTessellatePathOp.h index 8ee6d21bd6..89c9f7fe15 100644 --- a/src/gpu/tessellate/GrTessellatePathOp.h +++ b/src/gpu/tessellate/GrTessellatePathOp.h @@ -12,10 +12,11 @@ class GrAppliedHardClip; class GrStencilPathShader; +class GrResolveLevelCounter; -// Renders paths using a hybrid Red Book "stencil, then cover" method. Curves get linearized by -// GPU tessellation shaders. This Op doesn't apply analytic AA, so it requires a render target that -// supports either MSAA or mixed samples if AA is desired. +// Renders paths using a hybrid "Red Book" (stencil, then cover) method. Curves get linearized by +// either GPU tessellation shaders or indirect draws. This Op doesn't apply analytic AA, so it +// requires a render target that supports either MSAA or mixed samples if AA is desired. class GrTessellatePathOp : public GrDrawOp { public: enum class Flags { @@ -66,25 +67,41 @@ private: // and this is not an option as it would introduce T-junctions with the outer cubics. bool prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves); - // Produces a "Red Book" style triangulation of the SkPath's inner polygon(s). The inner - // polygons connect the endpoints of each verb. (i.e., they are the path that would result from - // collapsing all curves to single lines.) Stencilled together with the outer cubics, these - // define the complete path. + // Produces a "Red Book" style triangulation of the SkPath's inner polygon(s) using a + // "middle-out" topology (See GrMiddleOutPolygonTriangulator), and then prepares outer cubics in + // the cubic buffer. The inner triangles and outer cubics stencilled together define the + // complete path. // - // This method emits the inner triangles with a "middle-out" topology. Middle-out can reduce - // the load on the rasterizer by a great deal as compared to a linear triangle strip or fan. - // See GrMiddleOutPolygonTriangulator. - void prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves); + // If a resolveLevel counter is provided, this method resets it and uses it to count and + // prepares the outer cubics as indirect draws. Otherwise they are prepared as hardware + // tessellation patches. + // + // If drawTrianglesAsIndirectCubicDraw is true, then the resolveLevel counter must be non-null, + // and we express the inner triangles as an indirect cubic draw and sneak them in alongside the + // other cubic draws. + void prepareMiddleOutTrianglesAndCubics(GrMeshDrawOp::Target*, GrResolveLevelCounter* = nullptr, + bool drawTrianglesAsIndirectCubicDraw = false); - enum class CubicDataAlignment : bool { - kVertexBoundary, - kInstanceBoundary - }; + // Prepares a list of indirect draw commands and instance data for the path's "outer cubics", + // converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour + // consisting of a single cubic curve. Stencilled together with the inner triangles, these + // define the complete path. + void prepareIndirectOuterCubics(GrMeshDrawOp::Target*, const GrResolveLevelCounter&); - // Writes an array of "outer" cubics from each bezier in the SkPath, converting any quadratics - // to cubics. An outer cubic is an independent, 4-point closed contour consisting of a single - // cubic curve. Stencilled together with the inner triangles, these define the complete path. - void prepareOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves, CubicDataAlignment); + // For performance reasons we can often express triangles as an indirect cubic draw and sneak + // them in alongside the other indirect draws. This prepareIndirectOuterCubics variant allows + // the caller to provide a mapped cubic buffer with triangles already written into 4-point + // instances at the beginning. If numTrianglesAtBeginningOfData is nonzero, we add an extra + // indirect draw that renders these triangles. + void prepareIndirectOuterCubicsAndTriangles(GrMeshDrawOp::Target*, const GrResolveLevelCounter&, + SkPoint* cubicData, + int numTrianglesAtBeginningOfData); + + // Writes an array of "outer cubic" tessellation patches from each bezier in the SkPath, + // converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour + // consisting of a single cubic curve. Stencilled together with the inner triangles, these + // define the complete path. + void prepareTessellatedOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves); // Writes an array of cubic "wedges" from the SkPath, converting any lines or quadratics to // cubics. A wedge is an independent, 5-point closed contour consisting of 4 cubic control @@ -92,7 +109,7 @@ private: // stencilled, these wedges alone define the complete path. // // TODO: Eventually we want to use rational cubic wedges in order to support conics. - void prepareCubicWedges(GrMeshDrawOp::Target*); + void prepareTessellatedCubicWedges(GrMeshDrawOp::Target*); void onExecute(GrOpFlushState*, const SkRect& chainBounds) override; void drawStencilPass(GrOpFlushState*); @@ -137,6 +154,13 @@ private: int fCubicVertexCount; GrStencilPathShader* fStencilCubicsShader = nullptr; + // If fIndirectDrawBuffer is non-null, then we issue an indexed-indirect draw instead of using + // hardware tessellation. This is oftentimes faster than tessellation, and other times it serves + // as a polyfill when tessellation just isn't supported. + sk_sp fIndirectDrawBuffer; + size_t fIndirectDrawOffset; + int fIndirectDrawCount; + friend class GrOpMemoryPool; // For ctor. public: diff --git a/src/gpu/tessellate/GrTessellationPathRenderer.cpp b/src/gpu/tessellate/GrTessellationPathRenderer.cpp index 85fa43ee19..ab21f28b7e 100644 --- a/src/gpu/tessellate/GrTessellationPathRenderer.cpp +++ b/src/gpu/tessellate/GrTessellationPathRenderer.cpp @@ -37,8 +37,6 @@ GrTessellationPathRenderer::GrTessellationPathRenderer(const GrCaps& caps) : fAt GrPathRenderer::CanDrawPath GrTessellationPathRenderer::onCanDrawPath( const CanDrawPathArgs& args) const { - // This class should not have been added to the chain without tessellation support. - SkASSERT(args.fCaps->shaderCaps()->tessellationSupport()); if (!args.fShape->style().isSimpleFill() || args.fShape->inverseFilled() || args.fViewMatrix->hasPerspective()) { return CanDrawPath::kNo;