Reland "Delete the index buffer from middle-out tessellation"

This is a reland of 0d0b1b3b56

Original change's description:
> Delete the index buffer from middle-out tessellation
>
> This gives us more flexibility for customizing triangulations in
> future modes. It is also hopefully cheaper than the extra memory
> indirection from indexed draws.
>
> Bug: skia:10419
> Bug: chromium:1202607
> Change-Id: Iba41a35a634edf8f962c3d604c7e035e7a85801d
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/407296
> Commit-Queue: Chris Dalton <csmartdalton@google.com>
> Reviewed-by: Greg Daniel <egdaniel@google.com>

Bug: skia:10419
Bug: chromium:1202607
Change-Id: I73487087fba09d3ab95d8fa8bd4e2435ba802075
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/408676
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
Chris Dalton 2021-05-14 17:41:17 -06:00 committed by Skia Commit-Bot
parent 7069804621
commit 489fa0b121
11 changed files with 72 additions and 140 deletions

View File

@ -52,7 +52,7 @@ public:
bool operator==(const GrDrawIndirectWriter& that) { return fData == that.fData; }
bool isValid() const { return fData != nullptr; }
operator bool() const { return fData != nullptr; }
GrDrawIndirectWriter makeOffset(int drawCount) const { return {fData + drawCount}; }

View File

@ -51,7 +51,7 @@ GrShaderCaps::GrShaderCaps(const GrContextOptions& options) {
fSampleMaskSupport = false;
fExternalTextureSupport = false;
fVertexIDSupport = false;
fFPManipulationSupport = false;
fBitManipulationSupport = false;
fFloatIs32Bits = true;
fHalfIs32Bits = false;
fHasLowFragmentPrecision = false;
@ -135,7 +135,7 @@ void GrShaderCaps::dumpJSON(SkJSONWriter* writer) const {
writer->appendBool("Sample mask support", fSampleMaskSupport);
writer->appendBool("External texture support", fExternalTextureSupport);
writer->appendBool("sk_VertexID support", fVertexIDSupport);
writer->appendBool("Floating point manipulation support", fFPManipulationSupport);
writer->appendBool("Bit manipulation support", fBitManipulationSupport);
writer->appendBool("float == fp32", fFloatIs32Bits);
writer->appendBool("half == fp32", fHalfIs32Bits);
writer->appendBool("Has poor fragment precision", fHasLowFragmentPrecision);

View File

@ -76,8 +76,8 @@ public:
bool vertexIDSupport() const { return fVertexIDSupport; }
// frexp, ldexp, etc.
bool fpManipulationSupport() const { return fFPManipulationSupport; }
// frexp, ldexp, findMSB, findLSB.
bool bitManipulationSupport() const { return fBitManipulationSupport; }
bool floatIs32Bits() const { return fFloatIs32Bits; }
@ -289,7 +289,7 @@ private:
bool fSampleMaskSupport : 1;
bool fExternalTextureSupport : 1;
bool fVertexIDSupport : 1;
bool fFPManipulationSupport : 1;
bool fBitManipulationSupport : 1;
bool fFloatIs32Bits : 1;
bool fHalfIs32Bits : 1;
bool fHasLowFragmentPrecision : 1;

View File

@ -241,7 +241,7 @@ void GrD3DCaps::initShaderCaps(int vendorID, const D3D12_FEATURE_DATA_D3D12_OPTI
shaderCaps->fIntegerSupport = true;
shaderCaps->fVertexIDSupport = true;
shaderCaps->fFPManipulationSupport = true;
shaderCaps->fBitManipulationSupport = true;
shaderCaps->fFloatIs32Bits = true;
shaderCaps->fHalfIs32Bits =

View File

@ -968,9 +968,9 @@ void GrGLCaps::initGLSL(const GrGLContextInfo& ctxInfo, const GrGLInterface* gli
}
if (GR_IS_GR_GL(standard)) {
shaderCaps->fFPManipulationSupport = ctxInfo.glslGeneration() >= k400_GrGLSLGeneration;
shaderCaps->fBitManipulationSupport = ctxInfo.glslGeneration() >= k400_GrGLSLGeneration;
} else if (GR_IS_GR_GL_ES(standard) || GR_IS_GR_WEBGL(standard)) {
shaderCaps->fFPManipulationSupport = ctxInfo.glslGeneration() >= k310es_GrGLSLGeneration;
shaderCaps->fBitManipulationSupport = ctxInfo.glslGeneration() >= k310es_GrGLSLGeneration;
}
shaderCaps->fFloatIs32Bits = is_float_fp32(ctxInfo, gli, GR_GL_HIGH_FLOAT);

View File

@ -100,19 +100,13 @@ void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMa
SkASSERT(count == breadcrumbTriangleList->count());
}
fIndirectIndexBuffer = GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
target->resourceProvider());
if (!fIndirectIndexBuffer) {
vertexAlloc.unlock(0);
return;
}
// Allocate space for the GrDrawIndexedIndirectCommand structs. Allocate enough for each
// possible resolve level (kMaxResolveLevel; resolveLevel=0 never has any instances), plus one
// more for the optional inner fan triangles.
int indirectLockCnt = kMaxResolveLevel + 1;
GrDrawIndexedIndirectWriter indirectWriter = target->makeDrawIndexedIndirectSpace(
indirectLockCnt, &fIndirectDrawBuffer, &fIndirectDrawOffset);
GrDrawIndirectWriter indirectWriter = target->makeDrawIndirectSpace(indirectLockCnt,
&fIndirectDrawBuffer,
&fIndirectDrawOffset);
if (!indirectWriter) {
SkASSERT(!fIndirectDrawBuffer);
vertexAlloc.unlock(0);
@ -123,31 +117,22 @@ void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMa
// location at each resolve level.
GrVertexWriter instanceLocations[kMaxResolveLevel + 1];
int currentBaseInstance = fBaseInstance;
if (numTrianglesAtBeginningOfData) {
// The caller has already packed "triangleInstanceCount" triangles into 4-point instances
// at the beginning of the instance buffer. Add a special-case indirect draw here that will
// emit the triangles [P0, P1, P2] from these 4-point instances.
SkASSERT(fIndirectDrawCount < indirectLockCnt);
GrMiddleOutCubicShader::WriteDrawTrianglesIndirectCmd(&indirectWriter,
numTrianglesAtBeginningOfData,
fBaseInstance);
++fIndirectDrawCount;
currentBaseInstance += numTrianglesAtBeginningOfData;
}
SkASSERT(fResolveLevelCounts[0] == 0);
for (int resolveLevel = 1; resolveLevel <= kMaxResolveLevel; ++resolveLevel) {
for (int resolveLevel=1, numExtraInstances=numTrianglesAtBeginningOfData;
resolveLevel <= kMaxResolveLevel;
++resolveLevel, numExtraInstances=0) {
int instanceCountAtCurrLevel = fResolveLevelCounts[resolveLevel];
if (!instanceCountAtCurrLevel) {
if (!(instanceCountAtCurrLevel + numExtraInstances)) {
SkDEBUGCODE(instanceLocations[resolveLevel] = nullptr;)
continue;
}
instanceLocations[resolveLevel] = instanceWriter.makeOffset(0);
SkASSERT(fIndirectDrawCount < indirectLockCnt);
GrMiddleOutCubicShader::WriteDrawCubicsIndirectCmd(&indirectWriter, resolveLevel,
instanceCountAtCurrLevel,
currentBaseInstance);
GrMiddleOutCubicShader::WriteDrawIndirectCmd(&indirectWriter, resolveLevel,
instanceCountAtCurrLevel + numExtraInstances,
currentBaseInstance);
++fIndirectDrawCount;
currentBaseInstance += instanceCountAtCurrLevel;
currentBaseInstance += instanceCountAtCurrLevel + numExtraInstances;
instanceWriter = instanceWriter.makeOffset(instanceCountAtCurrLevel * 4 * sizeof(SkPoint));
}
@ -224,9 +209,9 @@ void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMa
void GrPathIndirectTessellator::draw(GrOpFlushState* flushState) const {
if (fIndirectDrawCount) {
flushState->bindBuffers(fIndirectIndexBuffer, fInstanceBuffer, nullptr);
flushState->drawIndexedIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,
fIndirectDrawCount);
flushState->bindBuffers(nullptr, fInstanceBuffer, nullptr);
flushState->drawIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,
fIndirectDrawCount);
}
}

View File

@ -71,7 +71,6 @@ private:
sk_sp<const GrBuffer> fIndirectDrawBuffer;
size_t fIndirectDrawOffset = 0;
int fIndirectDrawCount = 0;
sk_sp<const GrBuffer> fIndirectIndexBuffer;
};
// Base class for GrPathTessellators that draw actual hardware tessellation patches.

View File

@ -302,100 +302,56 @@ GrGLSLGeometryProcessor* GrWedgeTessellateShader::createGLSLInstance(const GrSha
return new WedgeImpl;
}
constexpr static int kMaxResolveLevel = GrTessellationPathRenderer::kMaxResolveLevel;
GR_DECLARE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
sk_sp<const GrGpuBuffer> GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
GrResourceProvider* resourceProvider) {
GR_DEFINE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
if (auto buffer = resourceProvider->findByUniqueKey<GrGpuBuffer>(gMiddleOutIndexBufferKey)) {
return std::move(buffer);
}
// One explicit triangle at index 0, and one middle-out cubic with kMaxResolveLevel line
// segments beginning at index 3.
constexpr static int kIndexCount = 3 + NumVerticesAtResolveLevel(kMaxResolveLevel);
auto buffer = resourceProvider->createBuffer(
kIndexCount * sizeof(uint16_t), GrGpuBufferType::kIndex, kStatic_GrAccessPattern);
if (!buffer) {
return nullptr;
}
// We shouldn't bin and/or cache static buffers.
SkASSERT(buffer->size() == kIndexCount * sizeof(uint16_t));
SkASSERT(!buffer->resourcePriv().getScratchKey().isValid());
auto indexData = static_cast<uint16_t*>(buffer->map());
SkAutoTMalloc<uint16_t> stagingBuffer;
if (!indexData) {
SkASSERT(!buffer->isMapped());
indexData = stagingBuffer.reset(kIndexCount);
}
// Indices 0,1,2 contain special values that emit points P0, P1, and P2 respectively. (When the
// vertex shader is fed an index value larger than (1 << kMaxResolveLevel), it emits
// P[index % 4].)
int i = 0;
indexData[i++] = (1 << kMaxResolveLevel) + 4; // % 4 == 0
indexData[i++] = (1 << kMaxResolveLevel) + 5; // % 4 == 1
indexData[i++] = (1 << kMaxResolveLevel) + 6; // % 4 == 2
// Starting at index 3, we triangulate a cubic with 2^kMaxResolveLevel line segments. Each
// index value corresponds to parametric value T=(index / 2^kMaxResolveLevel). Since the
// triangles are arranged in "middle-out" order, we will be able to conveniently control the
// resolveLevel by changing only the indexCount.
for (uint16_t advance = 1 << (kMaxResolveLevel - 1); advance; advance >>= 1) {
uint16_t T = 0;
do {
indexData[i++] = T;
indexData[i++] = (T += advance);
indexData[i++] = (T += advance);
} while (T != (1 << kMaxResolveLevel));
}
SkASSERT(i == kIndexCount);
if (buffer->isMapped()) {
buffer->unmap();
} else {
buffer->updateData(stagingBuffer, kIndexCount * sizeof(uint16_t));
}
buffer->resourcePriv().setUniqueKey(gMiddleOutIndexBufferKey);
return std::move(buffer);
}
class GrMiddleOutCubicShader::Impl : public GrStencilPathShader::Impl {
void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
const auto& shader = args.fGeomProc.cast<GrMiddleOutCubicShader>();
args.fVaryingHandler->emitAttributes(shader);
args.fVertBuilder->defineConstantf("int", "kMaxVertexID", "%i", 1 << kMaxResolveLevel);
args.fVertBuilder->defineConstantf("float", "kInverseMaxVertexID",
"(1.0 / float(kMaxVertexID))");
args.fVertBuilder->insertFunction(kUnpackRationalCubicFn);
args.fVertBuilder->insertFunction(kEvalRationalCubicFn);
if (args.fShaderCaps->bitManipulationSupport()) {
// Determines the T value at which to place the given vertex in a "middle-out" topology.
args.fVertBuilder->insertFunction(R"(
float find_middle_out_T() {
int totalTriangleIdx = sk_VertexID/3 + 1;
int depth = findMSB(totalTriangleIdx);
int firstTriangleAtDepth = (1 << depth);
int triangleIdxWithinDepth = totalTriangleIdx - firstTriangleAtDepth;
int vertexIdxWithinDepth = triangleIdxWithinDepth * 2 + sk_VertexID % 3;
return ldexp(float(vertexIdxWithinDepth), -1 - depth);
})");
} else {
// Determines the T value at which to place the given vertex in a "middle-out" topology.
args.fVertBuilder->insertFunction(R"(
float find_middle_out_T() {
float totalTriangleIdx = float(sk_VertexID/3) + 1;
float depth = floor(log2(totalTriangleIdx));
float firstTriangleAtDepth = exp2(depth);
float triangleIdxWithinDepth = totalTriangleIdx - firstTriangleAtDepth;
float vertexIdxWithinDepth = triangleIdxWithinDepth * 2 + float(sk_VertexID % 3);
return vertexIdxWithinDepth * exp2(-1 - depth);
})");
}
args.fVertBuilder->codeAppend(R"(
float2 pos;
if (isinf(inputPoints_2_3.z)) {
// A conic with w=Inf is an exact triangle.
pos = ((sk_VertexID & 3) == 0) ? inputPoints_0_1.xy :
((sk_VertexID & 2) == 0) ? inputPoints_0_1.zw : inputPoints_2_3.xy;
pos = (sk_VertexID < 1) ? inputPoints_0_1.xy
: (sk_VertexID == 1) ? inputPoints_0_1.zw
: inputPoints_2_3.xy;
} else {
// Evaluate the cubic at T = (sk_VertexID / 2^kMaxResolveLevel).
float T = float(sk_VertexID) * kInverseMaxVertexID;
float4x3 P = unpack_rational_cubic(inputPoints_0_1.xy, inputPoints_0_1.zw,
inputPoints_2_3.xy, inputPoints_2_3.zw);
float T = find_middle_out_T();
pos = eval_rational_cubic(P, T);
})");
GrShaderVar vertexPos("pos", kFloat2_GrSLType);
if (!shader.viewMatrix().isIdentity()) {
const char* viewMatrix;
fViewMatrixUniform = args.fUniformHandler->addUniform(
nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
args.fVertBuilder->codeAppendf(R"(
float2 transformedPoint = (%s * float3(pos, 1)).xy;)", viewMatrix);
vertexPos.set(kFloat2_GrSLType, "transformedPoint");
pos = (%s * float3(pos, 1)).xy;)", viewMatrix);
}
gpArgs->fPositionVar = vertexPos;
gpArgs->fPositionVar.set(kFloat2_GrSLType, "pos");
// No fragment shader.
}
};

View File

@ -133,10 +133,17 @@ private:
GrGLSLGeometryProcessor* createGLSLInstance(const GrShaderCaps&) const override;
};
// Uses indirect (instanced) draws to triangulate standalone closed cubics with a "middle-out"
// topology. The caller must compute each cubic's resolveLevel on the CPU (i.e., the log2 number of
// line segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2), and then
// sort the instance buffer by resolveLevel for efficient batching of indirect draws.
// Uses instanced draws to triangulate standalone closed curves with a "middle-out" topology.
// Middle-out draws a triangle with vertices at T=[0, 1/2, 1] and then recurses breadth first:
//
// depth=0: T=[0, 1/2, 1]
// depth=1: T=[0, 1/4, 2/4], T=[2/4, 3/4, 1]
// depth=2: T=[0, 1/8, 2/8], T=[2/8, 3/8, 4/8], T=[4/8, 5/8, 6/8], T=[6/8, 7/8, 1]
// ...
//
// The caller may compute each cubic's resolveLevel on the CPU (i.e., the log2 number of line
// segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2/conic_log2), and
// then sort the instance buffer by resolveLevel for efficient batching of indirect draws.
class GrMiddleOutCubicShader : public GrStencilPathShader {
public:
// How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel
@ -152,31 +159,16 @@ public:
// Configures an indirect draw to render cubic instances with 2^resolveLevel evenly-spaced (in
// the parametric sense) line segments.
static void WriteDrawCubicsIndirectCmd(GrDrawIndexedIndirectWriter* indirectWriter,
int resolveLevel, uint32_t instanceCount,
uint32_t baseInstance) {
static void WriteDrawIndirectCmd(GrDrawIndirectWriter* indirectWriter, int resolveLevel,
uint32_t instanceCount, uint32_t baseInstance) {
SkASSERT(resolveLevel > 0 && resolveLevel <= GrTessellationPathRenderer::kMaxResolveLevel);
// Starting at baseIndex=3, the index buffer triangulates a cubic with 2^kMaxResolveLevel
// line segments. Each index value corresponds to a parametric T value on the curve. Since
// the triangles are arranged in "middle-out" order, we can conveniently control the
// resolveLevel by changing only the indexCount.
uint32_t indexCount = NumVerticesAtResolveLevel(resolveLevel);
indirectWriter->writeIndexed(indexCount, 3, instanceCount, baseInstance, 0);
// The vertex shader determines the T value at which to draw each vertex. Since the
// triangles are arranged in "middle-out" order, we can conveniently control the
// resolveLevel by changing only the vertexCount.
uint32_t vertexCount = NumVerticesAtResolveLevel(resolveLevel);
indirectWriter->write(instanceCount, baseInstance, vertexCount, 0);
}
// For performance reasons we can often express triangles as an indirect cubic draw and sneak
// them in alongside the other indirect draws. This method configures an indirect draw to emit
// the triangle [P0, P1, P2] from a 4-point instance.
static void WriteDrawTrianglesIndirectCmd(GrDrawIndexedIndirectWriter* indirectWriter,
uint32_t instanceCount, uint32_t baseInstance) {
// Indices 0,1,2 have special index values that emit points P0, P1, and P2 respectively.
indirectWriter->writeIndexed(3, 0, instanceCount, baseInstance, 0);
}
// Returns the index buffer that should be bound when drawing with this shader.
// (Our vertex shader uses raw index values directly, so there is no vertex buffer.)
static sk_sp<const GrGpuBuffer> FindOrMakeMiddleOutIndexBuffer(GrResourceProvider*);
GrMiddleOutCubicShader(const SkMatrix& viewMatrix)
: GrStencilPathShader(kTessellate_GrMiddleOutCubicShader_ClassID, viewMatrix,
GrPrimitiveType::kTriangles) {

View File

@ -757,7 +757,7 @@ void GrStrokeIndirectTessellator::prepare(GrMeshDrawOp::Target* target,
GrDrawIndirectWriter indirectWriter = target->makeDrawIndirectSpace(fChainedDrawIndirectCount,
&fDrawIndirectBuffer,
&fDrawIndirectOffset);
if (!indirectWriter.isValid()) {
if (!indirectWriter) {
SkASSERT(!fDrawIndirectBuffer);
return;
}

View File

@ -721,7 +721,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties,
shaderCaps->fIntegerSupport = true;
shaderCaps->fNonsquareMatrixSupport = true;
shaderCaps->fVertexIDSupport = true;
shaderCaps->fFPManipulationSupport = true;
shaderCaps->fBitManipulationSupport = true;
// Assume the minimum precisions mandated by the SPIR-V spec.
shaderCaps->fFloatIs32Bits = true;