Reland "Delete the index buffer from middle-out tessellation"

This is a reland of 0d0b1b3b56

Original change's description:
> Delete the index buffer from middle-out tessellation
> This gives us more flexibility for customizing triangulations in
> future modes. It is also hopefully cheaper than the extra memory
> indirection from indexed draws.
> Bug: skia:10419
> Bug: chromium:1202607
> Change-Id: Iba41a35a634edf8f962c3d604c7e035e7a85801d
> Reviewed-on:
> Commit-Queue: Chris Dalton <>
> Reviewed-by: Greg Daniel <>

Bug: skia:10419
Bug: chromium:1202607
Change-Id: I2f5022d2122dee1ca197780b534663b37cd2504f
Reviewed-by: Greg Daniel <>
Commit-Queue: Chris Dalton <>
This commit is contained in:
Chris Dalton 2021-05-13 11:26:33 -06:00 committed by Skia Commit-Bot
parent 1264001c8c
commit 0c2ee32f4c
11 changed files with 63 additions and 112 deletions

View File

@ -52,7 +52,7 @@ public:
bool operator==(const GrDrawIndirectWriter& that) { return fData == that.fData; }
bool isValid() const { return fData != nullptr; }
operator bool() const { return fData != nullptr; }
GrDrawIndirectWriter makeOffset(int drawCount) const { return {fData + drawCount}; }

View File

@ -51,7 +51,7 @@ GrShaderCaps::GrShaderCaps(const GrContextOptions& options) {
fSampleMaskSupport = false;
fExternalTextureSupport = false;
fVertexIDSupport = false;
fFPManipulationSupport = false;
fBitManipulationSupport = false;
fFloatIs32Bits = true;
fHalfIs32Bits = false;
fHasLowFragmentPrecision = false;
@ -135,7 +135,7 @@ void GrShaderCaps::dumpJSON(SkJSONWriter* writer) const {
writer->appendBool("Sample mask support", fSampleMaskSupport);
writer->appendBool("External texture support", fExternalTextureSupport);
writer->appendBool("sk_VertexID support", fVertexIDSupport);
writer->appendBool("Floating point manipulation support", fFPManipulationSupport);
writer->appendBool("Bit manipulation support", fBitManipulationSupport);
writer->appendBool("float == fp32", fFloatIs32Bits);
writer->appendBool("half == fp32", fHalfIs32Bits);
writer->appendBool("Has poor fragment precision", fHasLowFragmentPrecision);

View File

@ -76,8 +76,8 @@ public:
bool vertexIDSupport() const { return fVertexIDSupport; }
// frexp, ldexp, etc.
bool fpManipulationSupport() const { return fFPManipulationSupport; }
// frexp, ldexp, findMSB, findLSB.
bool bitManipulationSupport() const { return fBitManipulationSupport; }
bool floatIs32Bits() const { return fFloatIs32Bits; }
@ -289,7 +289,7 @@ private:
bool fSampleMaskSupport : 1;
bool fExternalTextureSupport : 1;
bool fVertexIDSupport : 1;
bool fFPManipulationSupport : 1;
bool fBitManipulationSupport : 1;
bool fFloatIs32Bits : 1;
bool fHalfIs32Bits : 1;
bool fHasLowFragmentPrecision : 1;

View File

@ -241,7 +241,7 @@ void GrD3DCaps::initShaderCaps(int vendorID, const D3D12_FEATURE_DATA_D3D12_OPTI
shaderCaps->fIntegerSupport = true;
shaderCaps->fVertexIDSupport = true;
shaderCaps->fFPManipulationSupport = true;
shaderCaps->fBitManipulationSupport = true;
shaderCaps->fFloatIs32Bits = true;
shaderCaps->fHalfIs32Bits =

View File

@ -955,9 +955,9 @@ void GrGLCaps::initGLSL(const GrGLContextInfo& ctxInfo, const GrGLInterface* gli
if (GR_IS_GR_GL(standard)) {
shaderCaps->fFPManipulationSupport = ctxInfo.glslGeneration() >= k400_GrGLSLGeneration;
shaderCaps->fBitManipulationSupport = ctxInfo.glslGeneration() >= k400_GrGLSLGeneration;
} else if (GR_IS_GR_GL_ES(standard) || GR_IS_GR_WEBGL(standard)) {
shaderCaps->fFPManipulationSupport = ctxInfo.glslGeneration() >= k310es_GrGLSLGeneration;
shaderCaps->fBitManipulationSupport = ctxInfo.glslGeneration() >= k310es_GrGLSLGeneration;
shaderCaps->fFloatIs32Bits = is_float_fp32(ctxInfo, gli, GR_GL_HIGH_FLOAT);

View File

@ -101,19 +101,13 @@ void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMa
SkASSERT(count == breadcrumbTriangleList->count());
fIndirectIndexBuffer = GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
if (!fIndirectIndexBuffer) {
// Allocate space for the GrDrawIndexedIndirectCommand structs. Allocate enough for each
// possible resolve level (kMaxResolveLevel; resolveLevel=0 never has any instances), plus one
// more for the optional inner fan triangles.
int indirectLockCnt = kMaxResolveLevel + 1;
GrDrawIndexedIndirectWriter indirectWriter = target->makeDrawIndexedIndirectSpace(
indirectLockCnt, &fIndirectDrawBuffer, &fIndirectDrawOffset);
GrDrawIndirectWriter indirectWriter = target->makeDrawIndirectSpace(indirectLockCnt,
if (!indirectWriter) {
@ -220,9 +214,9 @@ void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMa
void GrPathIndirectTessellator::draw(GrOpFlushState* flushState) const {
if (fIndirectDrawCount) {
flushState->bindBuffers(fIndirectIndexBuffer, fInstanceBuffer, nullptr);
flushState->drawIndexedIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,
flushState->bindBuffers(nullptr, fInstanceBuffer, nullptr);
flushState->drawIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,

View File

@ -71,7 +71,6 @@ private:
sk_sp<const GrBuffer> fIndirectDrawBuffer;
size_t fIndirectDrawOffset = 0;
int fIndirectDrawCount = 0;
sk_sp<const GrBuffer> fIndirectIndexBuffer;
// Base class for GrPathTessellators that draw actual hardware tessellation patches.

View File

@ -302,100 +302,56 @@ GrGLSLGeometryProcessor* GrWedgeTessellateShader::createGLSLInstance(const GrSha
return new WedgeImpl;
constexpr static int kMaxResolveLevel = GrTessellationPathRenderer::kMaxResolveLevel;
sk_sp<const GrGpuBuffer> GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
GrResourceProvider* resourceProvider) {
if (auto buffer = resourceProvider->findByUniqueKey<GrGpuBuffer>(gMiddleOutIndexBufferKey)) {
return std::move(buffer);
// One explicit triangle at index 0, and one middle-out cubic with kMaxResolveLevel line
// segments beginning at index 3.
constexpr static int kIndexCount = 3 + NumVerticesAtResolveLevel(kMaxResolveLevel);
auto buffer = resourceProvider->createBuffer(
kIndexCount * sizeof(uint16_t), GrGpuBufferType::kIndex, kStatic_GrAccessPattern);
if (!buffer) {
return nullptr;
// We shouldn't bin and/or cache static buffers.
SkASSERT(buffer->size() == kIndexCount * sizeof(uint16_t));
auto indexData = static_cast<uint16_t*>(buffer->map());
SkAutoTMalloc<uint16_t> stagingBuffer;
if (!indexData) {
indexData = stagingBuffer.reset(kIndexCount);
// Indices 0,1,2 contain special values that emit points P0, P1, and P2 respectively. (When the
// vertex shader is fed an index value larger than (1 << kMaxResolveLevel), it emits
// P[index % 4].)
int i = 0;
indexData[i++] = (1 << kMaxResolveLevel) + 4; // % 4 == 0
indexData[i++] = (1 << kMaxResolveLevel) + 5; // % 4 == 1
indexData[i++] = (1 << kMaxResolveLevel) + 6; // % 4 == 2
// Starting at index 3, we triangulate a cubic with 2^kMaxResolveLevel line segments. Each
// index value corresponds to parametric value T=(index / 2^kMaxResolveLevel). Since the
// triangles are arranged in "middle-out" order, we will be able to conveniently control the
// resolveLevel by changing only the indexCount.
for (uint16_t advance = 1 << (kMaxResolveLevel - 1); advance; advance >>= 1) {
uint16_t T = 0;
do {
indexData[i++] = T;
indexData[i++] = (T += advance);
indexData[i++] = (T += advance);
} while (T != (1 << kMaxResolveLevel));
SkASSERT(i == kIndexCount);
if (buffer->isMapped()) {
} else {
buffer->updateData(stagingBuffer, kIndexCount * sizeof(uint16_t));
return std::move(buffer);
class GrMiddleOutCubicShader::Impl : public GrStencilPathShader::Impl {
void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
const auto& shader = args.fGeomProc.cast<GrMiddleOutCubicShader>();
args.fVertBuilder->defineConstantf("int", "kMaxVertexID", "%i", 1 << kMaxResolveLevel);
args.fVertBuilder->defineConstantf("float", "kInverseMaxVertexID",
"(1.0 / float(kMaxVertexID))");
if (args.fShaderCaps->bitManipulationSupport()) {
// Determines the T value at which to place the given vertex in a "middle-out" topology.
float find_middle_out_T() {
int totalTriangleIdx = sk_VertexID/3 + 1;
int depth = findMSB(totalTriangleIdx);
int firstTriangleAtDepth = (1 << depth);
int triangleIdxWithinDepth = totalTriangleIdx - firstTriangleAtDepth;
int vertexIdxWithinDepth = triangleIdxWithinDepth * 2 + sk_VertexID % 3;
return ldexp(float(vertexIdxWithinDepth), -1 - depth);
} else {
// Determines the T value at which to place the given vertex in a "middle-out" topology.
float find_middle_out_T() {
float totalTriangleIdx = float(sk_VertexID/3) + 1;
float depth = floor(log2(totalTriangleIdx));
float firstTriangleAtDepth = exp2(depth);
float triangleIdxWithinDepth = totalTriangleIdx - firstTriangleAtDepth;
float vertexIdxWithinDepth = triangleIdxWithinDepth * 2 + float(sk_VertexID % 3);
return vertexIdxWithinDepth * exp2(-1 - depth);
float2 pos;
if (isinf(inputPoints_2_3.z)) {
// A conic with w=Inf is an exact triangle.
pos = (sk_VertexID == 0) ? inputPoints_0_1.xy :
(sk_VertexID != kMaxVertexID) ? : inputPoints_2_3.xy;
pos = (sk_VertexID < 1) ? inputPoints_0_1.xy
: (sk_VertexID == 1) ?
: inputPoints_2_3.xy;
} else {
// Evaluate the cubic at T = (sk_VertexID / 2^kMaxResolveLevel).
float T = float(sk_VertexID) * kInverseMaxVertexID;
float4x3 P = unpack_rational_cubic(inputPoints_0_1.xy,,
float T = find_middle_out_T();
pos = eval_rational_cubic(P, T);
GrShaderVar vertexPos("pos", kFloat2_GrSLType);
if (!shader.viewMatrix().isIdentity()) {
const char* viewMatrix;
fViewMatrixUniform = args.fUniformHandler->addUniform(
nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
float2 transformedPoint = (%s * float3(pos, 1)).xy;)", viewMatrix);
vertexPos.set(kFloat2_GrSLType, "transformedPoint");
pos = (%s * float3(pos, 1)).xy;)", viewMatrix);
gpArgs->fPositionVar = vertexPos;
gpArgs->fPositionVar.set(kFloat2_GrSLType, "pos");
// No fragment shader.

View File

@ -133,10 +133,17 @@ private:
GrGLSLGeometryProcessor* createGLSLInstance(const GrShaderCaps&) const override;
// Uses indirect (instanced) draws to triangulate standalone closed cubics with a "middle-out"
// topology. The caller must compute each cubic's resolveLevel on the CPU (i.e., the log2 number of
// line segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2), and then
// sort the instance buffer by resolveLevel for efficient batching of indirect draws.
// Uses instanced draws to triangulate standalone closed curves with a "middle-out" topology.
// Middle-out draws a triangle with vertices at T=[0, 1/2, 1] and then recurses breadth first:
// depth=0: T=[0, 1/2, 1]
// depth=1: T=[0, 1/4, 2/4], T=[2/4, 3/4, 1]
// depth=2: T=[0, 1/8, 2/8], T=[2/8, 3/8, 4/8], T=[4/8, 5/8, 6/8], T=[6/8, 7/8, 1]
// ...
// The caller may compute each cubic's resolveLevel on the CPU (i.e., the log2 number of line
// segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2/conic_log2), and
// then sort the instance buffer by resolveLevel for efficient batching of indirect draws.
class GrMiddleOutCubicShader : public GrStencilPathShader {
// How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel
@ -152,21 +159,16 @@ public:
// Configures an indirect draw to render cubic instances with 2^resolveLevel evenly-spaced (in
// the parametric sense) line segments.
static void WriteDrawIndirectCmd(GrDrawIndexedIndirectWriter* indirectWriter, int resolveLevel,
static void WriteDrawIndirectCmd(GrDrawIndirectWriter* indirectWriter, int resolveLevel,
uint32_t instanceCount, uint32_t baseInstance) {
SkASSERT(resolveLevel > 0 && resolveLevel <= GrTessellationPathRenderer::kMaxResolveLevel);
// Starting at baseIndex=3, the index buffer triangulates a cubic with 2^kMaxResolveLevel
// line segments. Each index value corresponds to a parametric T value on the curve. Since
// the triangles are arranged in "middle-out" order, we can conveniently control the
// resolveLevel by changing only the indexCount.
uint32_t indexCount = NumVerticesAtResolveLevel(resolveLevel);
indirectWriter->writeIndexed(indexCount, 3, instanceCount, baseInstance, 0);
// The vertex shader determines the T value at which to draw each vertex. Since the
// triangles are arranged in "middle-out" order, we can conveniently control the
// resolveLevel by changing only the vertexCount.
uint32_t vertexCount = NumVerticesAtResolveLevel(resolveLevel);
indirectWriter->write(instanceCount, baseInstance, vertexCount, 0);
// Returns the index buffer that should be bound when drawing with this shader.
// (Our vertex shader uses raw index values directly, so there is no vertex buffer.)
static sk_sp<const GrGpuBuffer> FindOrMakeMiddleOutIndexBuffer(GrResourceProvider*);
GrMiddleOutCubicShader(const SkMatrix& viewMatrix)
: GrStencilPathShader(kTessellate_GrMiddleOutCubicShader_ClassID, viewMatrix,
GrPrimitiveType::kTriangles) {

View File

@ -758,7 +758,7 @@ void GrStrokeIndirectTessellator::prepare(GrMeshDrawOp::Target* target,
GrDrawIndirectWriter indirectWriter = target->makeDrawIndirectSpace(fChainedDrawIndirectCount,
if (!indirectWriter.isValid()) {
if (!indirectWriter) {

View File

@ -716,7 +716,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties,
shaderCaps->fIntegerSupport = true;
shaderCaps->fNonsquareMatrixSupport = true;
shaderCaps->fVertexIDSupport = true;
shaderCaps->fFPManipulationSupport = true;
shaderCaps->fBitManipulationSupport = true;
// Assume the minimum precisions mandated by the SPIR-V spec.
shaderCaps->fFloatIs32Bits = true;