From 40c906f58e3ed53b445675ac16844328bfdcb5a5 Mon Sep 17 00:00:00 2001 From: Chris Dalton Date: Mon, 26 Jul 2021 11:27:05 -0600 Subject: [PATCH] Don't store a GrVertexWriter pointer in GrMiddleOutPolygonTriangulator GrVertexWriter is itself a pointer, so this is a double reference. It's ~20% faster on the micro benchmark to store this as a literal GrVertexWriter. The motivation for this CL is that we need to optimize the CPU-side processing for tessellation in order to place the tessellation atlas above the cached triangulating path renderer in the chain. Bug: skia:12258 Change-Id: Ie10036d2fe618e848cdf67b9c23038fb9edf4c3b Reviewed-on: https://skia-review.googlesource.com/c/skia/+/433077 Reviewed-by: Brian Osman Commit-Queue: Chris Dalton --- bench/TessellateBench.cpp | 4 ++- .../GrMiddleOutPolygonTriangulator.h | 28 +++++++++++-------- src/gpu/tessellate/GrPathCurveTessellator.cpp | 8 +++--- src/gpu/tessellate/GrPathStencilCoverOp.cpp | 6 ++-- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp index 7a6cb228a4..7cb634c0ec 100644 --- a/bench/TessellateBench.cpp +++ b/bench/TessellateBench.cpp @@ -202,7 +202,9 @@ DEF_PATH_TESS_BENCH(middle_out_triangulation, int baseVertex; GrVertexWriter vertexWriter = static_cast(fTarget->makeVertexSpace( sizeof(SkPoint), kNumCubicsInChalkboard, &buffer, &baseVertex)); - GrMiddleOutPolygonTriangulator::WritePathInnerFan(&vertexWriter, 0, 0, fPath); + int numTrianglesWritten; + GrMiddleOutPolygonTriangulator::WritePathInnerFan(std::move(vertexWriter), 0, 0, fPath, + &numTrianglesWritten); } using PathStrokeList = GrStrokeTessellator::PathStrokeList; diff --git a/src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h b/src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h index a2df17b7df..b925181cfb 100644 --- a/src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h +++ b/src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h @@ -46,11 +46,11 @@ public: // Writes out 3 SkPoints per triangle to "vertexWriter". Additionally writes out "pad32Count" // repetitions of "pad32Value" after each triangle. Set pad32Count to 0 if the triangles are // to be tightly packed. - GrMiddleOutPolygonTriangulator(GrVertexWriter* vertexWriter, int pad32Count, + GrMiddleOutPolygonTriangulator(GrVertexWriter&& vertexWriter, int pad32Count, uint32_t pad32Value, int maxPushVertexCalls) - : fPad32Count(pad32Count) - , fPad32Value(pad32Value) - , fVertexWriter(vertexWriter) { + : fVertexWriter(std::move(vertexWriter)) + , fPad32Count(pad32Count) + , fPad32Value(pad32Value) { // Determine the deepest our stack can ever go. int maxStackDepth = SkNextLog2(maxPushVertexCalls) + 1; if (maxStackDepth > kStackPreallocCount) { @@ -122,9 +122,14 @@ public: SkASSERT(fTop->fVertexIdxDelta == 0); // Ensure we are in the initial stack state. } - static int WritePathInnerFan(GrVertexWriter* vertexWriter, int pad32Count, uint32_t pad32Value, - const SkPath& path) { - GrMiddleOutPolygonTriangulator middleOut(vertexWriter, pad32Count, pad32Value, + GrVertexWriter detachVertexWriter() { return std::move(fVertexWriter); } + + static GrVertexWriter WritePathInnerFan(GrVertexWriter&& vertexWriter, + int pad32Count, + uint32_t pad32Value, + const SkPath& path, + int* numTrianglesWritten) { + GrMiddleOutPolygonTriangulator middleOut(std::move(vertexWriter), pad32Count, pad32Value, path.countVerbs()); for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) { switch (verb) { @@ -141,7 +146,8 @@ public: break; } } - return middleOut.close(); + *numTrianglesWritten = middleOut.close(); + return middleOut.detachVertexWriter(); } private: @@ -168,20 +174,20 @@ private: void popTopTriangle(const SkPoint& lastPt) { SkASSERT(fTop > fVertexStack); // We should never pop the starting point. --fTop; - fVertexWriter->write(fTop[0].fPoint, fTop[1].fPoint, lastPt); + fVertexWriter.write(fTop[0].fPoint, fTop[1].fPoint, lastPt); if (fPad32Count) { // Output a 4-point conic with w=Inf. - fVertexWriter->fill(fPad32Value, fPad32Count); + fVertexWriter.fill(fPad32Value, fPad32Count); } } constexpr static int kStackPreallocCount = 32; + GrVertexWriter fVertexWriter; const int fPad32Count; const uint32_t fPad32Value; SkAutoSTMalloc fVertexStack; SkDEBUGCODE(int fStackAllocCount;) StackVertex* fTop; - GrVertexWriter* fVertexWriter; int fTotalClosedTriangleCount = 0; }; diff --git a/src/gpu/tessellate/GrPathCurveTessellator.cpp b/src/gpu/tessellate/GrPathCurveTessellator.cpp index f0b5423e2d..a79cca8127 100644 --- a/src/gpu/tessellate/GrPathCurveTessellator.cpp +++ b/src/gpu/tessellate/GrPathCurveTessellator.cpp @@ -222,10 +222,10 @@ void GrPathCurveTessellator::prepare(GrMeshDrawTarget* target, const SkRect& cul uint32_t pad32Value = shaderCaps.infinitySupport() ? GrVertexWriter::kIEEE_32_infinity : sk_bit_cast(GrTessellationShader::kTriangularConicCurveType); - int numWritten = GrMiddleOutPolygonTriangulator::WritePathInnerFan(&vertexWriter, - pad32Count, - pad32Value, path); - numRemainingTriangles -= numWritten; + int numTrianglesWritten; + vertexWriter = GrMiddleOutPolygonTriangulator::WritePathInnerFan( + std::move(vertexWriter), pad32Count, pad32Value, path, &numTrianglesWritten); + numRemainingTriangles -= numTrianglesWritten; } if (breadcrumbTriangleList) { int numWritten = 0; diff --git a/src/gpu/tessellate/GrPathStencilCoverOp.cpp b/src/gpu/tessellate/GrPathStencilCoverOp.cpp index b5c9e559b8..9671213853 100644 --- a/src/gpu/tessellate/GrPathStencilCoverOp.cpp +++ b/src/gpu/tessellate/GrPathStencilCoverOp.cpp @@ -237,8 +237,10 @@ void GrPathStencilCoverOp::onPrepare(GrOpFlushState* flushState) { GrEagerDynamicVertexAllocator vertexAlloc(flushState, &fFanBuffer, &fFanBaseVertex); int maxFanTriangles = fPath.countVerbs() - 2; // n - 2 triangles make an n-gon. GrVertexWriter triangleVertexWriter = vertexAlloc.lock(maxFanTriangles * 3); - fFanVertexCount = 3 * GrMiddleOutPolygonTriangulator::WritePathInnerFan( - &triangleVertexWriter, 0, 0, fPath); + int numTrianglesWritten; + GrMiddleOutPolygonTriangulator::WritePathInnerFan(std::move(triangleVertexWriter), 0, 0, + fPath, &numTrianglesWritten); + fFanVertexCount = 3 * numTrianglesWritten; SkASSERT(fFanVertexCount <= maxFanTriangles * 3); vertexAlloc.unlock(fFanVertexCount); }