Pull tessellation forward into GrTextureOp::onPrePrepareDraws
This trades memory and DDL-record time for GPU-flush time. That is, by tessellating in onPrePrepareDraws we will consume more memory (for the verts) and time at DDL-record time but, hopefully, less time when drawing the DDL. Change-Id: I97dd1a00486c7a1da53add2ea203bf3f90c20162 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/250178 Commit-Queue: Robert Phillips <robertphillips@google.com> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
parent
740f85949d
commit
c5a2c759fb
@ -267,17 +267,78 @@ private:
|
||||
int fQuadCnt;
|
||||
};
|
||||
|
||||
// This descriptor is created in onPrePrepare. It is allocated in the creation-time opData
|
||||
// arena. The actual data for the fDynamicStateArrays and fFixedDynamicState members will be
|
||||
// allocated in the arena passed to 'allocate'.
|
||||
// This descriptor is used in both onPrePrepareDraws and onPrepareDraws.
|
||||
//
|
||||
// In the onPrePrepareDraws case it is allocated in the creation-time opData
|
||||
// arena. Both allocateCommon and allocatePrePrepareOnly are called and they also allocate
|
||||
// their memory in the creation-time opData arena.
|
||||
//
|
||||
// In the onPrepareDraws case this descriptor is created on the stack and only
|
||||
// allocateCommon is called. In this case the common memory fields are allocated
|
||||
// in the flush-time arena (i.e., as part of the flushState).
|
||||
struct PrePreparedDesc {
|
||||
GrPipeline::DynamicStateArrays* fDynamicStateArrays = nullptr;
|
||||
GrPipeline::FixedDynamicState* fFixedDynamicState = nullptr;
|
||||
VertexSpec fVertexSpec;
|
||||
int fNumProxies = 0;
|
||||
int fNumTotalQuads = 0;
|
||||
GrPipeline::DynamicStateArrays* fDynamicStateArrays = nullptr;
|
||||
GrPipeline::FixedDynamicState* fFixedDynamicState = nullptr;
|
||||
|
||||
void allocate(SkArenaAlloc* arena, const GrAppliedClip* clip, GrTextureProxy* firstProxy) {
|
||||
// These two member variables are only used by 'onPrePrepareDraws'. The prior five are also
|
||||
// used by 'onPrepareDraws'
|
||||
// TODO: we could just recompute 'fVertexOffsets' in onPrepareDraws
|
||||
int* fVertexOffsets = nullptr;
|
||||
char* fVertices = nullptr;
|
||||
|
||||
// How big should 'fVertices' be to hold all the vertex data?
|
||||
size_t totalSizeInBytes() const {
|
||||
return fNumTotalQuads * fVertexSpec.verticesPerQuad() * fVertexSpec.vertexSize();
|
||||
}
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
int totalNumVertices() const {
|
||||
return fNumTotalQuads * fVertexSpec.verticesPerQuad();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Helper to fill in the fFixedDynamicState and fDynamicStateArrays. If there is more
|
||||
// than one mesh/proxy they are stored in fDynamicStateArrays but if there is only one
|
||||
// it is stored in fFixedDynamicState.
|
||||
void setMeshProxy(int index, GrTextureProxy* proxy) {
|
||||
SkASSERT(index < fNumProxies);
|
||||
|
||||
if (fDynamicStateArrays) {
|
||||
SkASSERT(fDynamicStateArrays->fPrimitiveProcessorTextures);
|
||||
SkASSERT(fNumProxies > 1);
|
||||
|
||||
fDynamicStateArrays->fPrimitiveProcessorTextures[index] = proxy;
|
||||
} else {
|
||||
SkASSERT(fFixedDynamicState);
|
||||
SkASSERT(fNumProxies == 1);
|
||||
|
||||
fFixedDynamicState->fPrimitiveProcessorTextures[index] = proxy;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
GrTextureProxy* getMeshProxy(int index) {
|
||||
SkASSERT(index < fNumProxies);
|
||||
|
||||
if (fDynamicStateArrays) {
|
||||
SkASSERT(fDynamicStateArrays->fPrimitiveProcessorTextures);
|
||||
SkASSERT(fNumProxies > 1);
|
||||
|
||||
return fDynamicStateArrays->fPrimitiveProcessorTextures[index];
|
||||
} else {
|
||||
SkASSERT(fFixedDynamicState);
|
||||
SkASSERT(fNumProxies == 1);
|
||||
|
||||
return fFixedDynamicState->fPrimitiveProcessorTextures[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Allocate the fields required in both onPrePrepareDraws and onPrepareDraws
|
||||
void allocateCommon(SkArenaAlloc* arena, const GrAppliedClip* clip) {
|
||||
// We'll use a dynamic state array for the GP textures when there are multiple ops.
|
||||
// Otherwise, we use fixed dynamic state to specify the single op's proxy.
|
||||
if (fNumProxies > 1) {
|
||||
@ -285,9 +346,15 @@ private:
|
||||
fFixedDynamicState = Target::MakeFixedDynamicState(arena, clip, 0);
|
||||
} else {
|
||||
fFixedDynamicState = Target::MakeFixedDynamicState(arena, clip, 1);
|
||||
fFixedDynamicState->fPrimitiveProcessorTextures[0] = firstProxy;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the fields only needed by onPrePrepareDraws
|
||||
void allocatePrePrepareOnly(SkArenaAlloc* arena) {
|
||||
fVertexOffsets = arena->makeArrayDefault<int>(fNumProxies);
|
||||
fVertices = arena->makeArrayDefault<char>(this->totalSizeInBytes());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// dstQuad should be the geometry transformed by the view matrix. If domainRect
|
||||
@ -419,18 +486,20 @@ private:
|
||||
fDomain = static_cast<unsigned>(netDomain);
|
||||
}
|
||||
|
||||
void tess(void* v, const VertexSpec& spec, const GrTextureProxy* proxy,
|
||||
GrQuadBuffer<ColorDomainAndAA>::Iter* iter, int cnt) const {
|
||||
static void Tess(void* v, const VertexSpec& spec, const GrTextureProxy* proxy,
|
||||
GrQuadBuffer<ColorDomainAndAA>::Iter* iter, int cnt,
|
||||
GrSamplerState::Filter filter) {
|
||||
TRACE_EVENT0("skia.gpu", TRACE_FUNC);
|
||||
auto origin = proxy->origin();
|
||||
const auto* texture = proxy->peekTexture();
|
||||
SkISize dimensions = proxy->backingStoreDimensions();
|
||||
|
||||
float iw, ih, h;
|
||||
if (proxy->textureType() == GrTextureType::kRectangle) {
|
||||
iw = ih = 1.f;
|
||||
h = texture->height();
|
||||
h = dimensions.height();
|
||||
} else {
|
||||
iw = 1.f / texture->width();
|
||||
ih = 1.f / texture->height();
|
||||
iw = 1.f / dimensions.width();
|
||||
ih = 1.f / dimensions.height();
|
||||
h = 1.f;
|
||||
}
|
||||
|
||||
@ -444,7 +513,7 @@ private:
|
||||
// Must correct the texture coordinates and domain now that the real texture size
|
||||
// is known
|
||||
compute_src_quad(origin, iter->localQuad(), iw, ih, h, &srcQuad);
|
||||
compute_domain(info.domain(), this->filter(), origin, info.fDomainRect, iw, ih, h,
|
||||
compute_domain(info.domain(), filter, origin, info.fDomainRect, iw, ih, h,
|
||||
&domain);
|
||||
v = GrQuadPerEdgeAA::Tessellate(v, spec, iter->deviceQuad(), info.fColor, srcQuad,
|
||||
domain, info.aaFlags());
|
||||
@ -464,9 +533,51 @@ private:
|
||||
|
||||
fPrePreparedDesc->fVertexSpec = this->characterize(&fPrePreparedDesc->fNumProxies,
|
||||
&fPrePreparedDesc->fNumTotalQuads);
|
||||
fPrePreparedDesc->allocate(arena, clip, fProxyCountPairs[0].fProxy);
|
||||
fPrePreparedDesc->allocateCommon(arena, clip);
|
||||
|
||||
// Pull forward the tessellation of the quads to here
|
||||
fPrePreparedDesc->allocatePrePrepareOnly(arena);
|
||||
|
||||
{
|
||||
SkDEBUGCODE(int totQuadsSeen = 0;)
|
||||
SkDEBUGCODE(int totVerticesSeen = 0;)
|
||||
int vertexOffsetInBuffer = 0;
|
||||
char* dst = fPrePreparedDesc->fVertices;
|
||||
const size_t vertexSize = fPrePreparedDesc->fVertexSpec.vertexSize();
|
||||
|
||||
int meshIndex = 0;
|
||||
for (const auto& op : ChainRange<TextureOp>(this)) {
|
||||
auto iter = op.fQuads.iterator();
|
||||
for (unsigned p = 0; p < op.fProxyCnt; ++p) {
|
||||
GrTextureProxy* proxy = op.fProxyCountPairs[p].fProxy;
|
||||
|
||||
int quadCnt = op.fProxyCountPairs[p].fQuadCnt;
|
||||
SkDEBUGCODE(totQuadsSeen += quadCnt;)
|
||||
|
||||
int meshVertexCnt = quadCnt * fPrePreparedDesc->fVertexSpec.verticesPerQuad();
|
||||
SkDEBUGCODE(totVerticesSeen += meshVertexCnt);
|
||||
|
||||
Tess(dst, fPrePreparedDesc->fVertexSpec, proxy, &iter, quadCnt, op.filter());
|
||||
|
||||
fPrePreparedDesc->fVertexOffsets[meshIndex] = vertexOffsetInBuffer;
|
||||
SkASSERT(vertexOffsetInBuffer * vertexSize ==
|
||||
(size_t)(dst - fPrePreparedDesc->fVertices));
|
||||
fPrePreparedDesc->setMeshProxy(meshIndex, proxy);
|
||||
++meshIndex;
|
||||
|
||||
vertexOffsetInBuffer += meshVertexCnt;
|
||||
dst += vertexSize * meshVertexCnt;
|
||||
}
|
||||
// If quad counts per proxy were calculated correctly, the entire iterator
|
||||
// should have been consumed.
|
||||
SkASSERT(!iter.next());
|
||||
}
|
||||
|
||||
SkASSERT(fPrePreparedDesc->totalSizeInBytes() ==
|
||||
(size_t)(dst - fPrePreparedDesc->fVertices));
|
||||
SkASSERT(meshIndex == fPrePreparedDesc->fNumProxies);
|
||||
SkASSERT(totQuadsSeen == fPrePreparedDesc->fNumTotalQuads);
|
||||
SkASSERT(totVerticesSeen == fPrePreparedDesc->totalNumVertices());
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
@ -542,7 +653,9 @@ private:
|
||||
SkArenaAlloc* arena = target->allocator();
|
||||
|
||||
desc.fVertexSpec = this->characterize(&desc.fNumProxies, &desc.fNumTotalQuads);
|
||||
desc.allocate(arena, target->appliedClip(), fProxyCountPairs[0].fProxy);
|
||||
desc.allocateCommon(arena, target->appliedClip());
|
||||
|
||||
SkASSERT(!desc.fVertexOffsets && !desc.fVertices);
|
||||
}
|
||||
|
||||
size_t vertexSize = desc.fVertexSpec.vertexSize();
|
||||
@ -573,7 +686,16 @@ private:
|
||||
}
|
||||
SkASSERT(numAllocatedVertices >= meshVertexCnt);
|
||||
|
||||
op.tess(vdata, desc.fVertexSpec, proxy, &iter, quadCnt);
|
||||
if (fPrePreparedDesc) {
|
||||
// TODO: when we've prePrepared the vertex data should we just allocate
|
||||
// all the vertices together and just do one memcpy?
|
||||
size_t offset = desc.fVertexOffsets[meshIndex] * vertexSize;
|
||||
memcpy(vdata, &desc.fVertices[offset], meshVertexCnt * vertexSize);
|
||||
SkASSERT(proxy == desc.getMeshProxy(meshIndex));
|
||||
} else {
|
||||
Tess(vdata, desc.fVertexSpec, proxy, &iter, quadCnt, op.filter());
|
||||
desc.setMeshProxy(meshIndex, proxy);
|
||||
}
|
||||
|
||||
SkASSERT(meshIndex < desc.fNumProxies);
|
||||
|
||||
@ -583,18 +705,17 @@ private:
|
||||
return;
|
||||
}
|
||||
meshes[meshIndex].setVertexData(vbuffer, vertexOffsetInBuffer);
|
||||
if (desc.fDynamicStateArrays) {
|
||||
desc.fDynamicStateArrays->fPrimitiveProcessorTextures[meshIndex] = proxy;
|
||||
}
|
||||
++meshIndex;
|
||||
|
||||
numAllocatedVertices -= meshVertexCnt;
|
||||
numQuadVerticesLeft -= meshVertexCnt;
|
||||
vertexOffsetInBuffer += meshVertexCnt;
|
||||
vdata = reinterpret_cast<char*>(vdata) + vertexSize * meshVertexCnt;
|
||||
}
|
||||
// If quad counts per proxy were calculated correctly, the entire iterator should have
|
||||
// been consumed.
|
||||
SkASSERT(!iter.next());
|
||||
|
||||
// If quad counts per proxy were calculated correctly, the entire iterator should
|
||||
// have been consumed.
|
||||
SkASSERT(fPrePreparedDesc || !iter.next());
|
||||
}
|
||||
SkASSERT(!numQuadVerticesLeft);
|
||||
SkASSERT(!numAllocatedVertices);
|
||||
|
Loading…
Reference in New Issue
Block a user