From e0b989e5e3ab64f3585fe1bf0228e964dad0678c Mon Sep 17 00:00:00 2001 From: Brian Salomon Date: Wed, 13 Mar 2019 16:11:44 -0400 Subject: [PATCH] Try to avoid vertex colors in Texture/FillRect ops when possible. Avoids unnecessary fragment shader color multiplication. Change-Id: I353d3ca91824ce20c9e9af1c5c84ab9953ddd8ab Reviewed-on: https://skia-review.googlesource.com/c/skia/+/201004 Commit-Queue: Brian Salomon Reviewed-by: Michael Ludwig --- src/gpu/ops/GrFillRectOp.cpp | 17 ++++++++--------- src/gpu/ops/GrQuadPerEdgeAA.cpp | 24 ++++++++++++++++-------- src/gpu/ops/GrQuadPerEdgeAA.h | 3 +++ src/gpu/ops/GrTextureOp.cpp | 21 +++++++++++---------- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/gpu/ops/GrFillRectOp.cpp b/src/gpu/ops/GrFillRectOp.cpp index d9a11c4923..94f90b3107 100644 --- a/src/gpu/ops/GrFillRectOp.cpp +++ b/src/gpu/ops/GrFillRectOp.cpp @@ -79,7 +79,7 @@ public: const GrPerspQuad& localQuad, GrQuadType localQuadType) : INHERITED(ClassID()) , fHelper(args, aaType, stencil) - , fWideColor(!SkPMColor4fFitsInBytes(paintColor)) { + , fColorType(GrQuadPerEdgeAA::MinColorType(paintColor)) { // The color stored with the quad is the clear color if a scissor-clear is decided upon // when executing the op. fDeviceQuads.push_back(deviceQuad, deviceQuadType, { paintColor, edgeFlags }); @@ -178,10 +178,9 @@ private: using Domain = GrQuadPerEdgeAA::Domain; static constexpr SkRect kEmptyDomain = SkRect::MakeEmpty(); - VertexSpec vertexSpec(fDeviceQuads.quadType(), - fWideColor ? ColorType::kHalf : ColorType::kByte, - fLocalQuads.quadType(), fHelper.usesLocalCoords(), Domain::kNo, - fHelper.aaType(), fHelper.compatibleWithAlphaAsCoverage()); + VertexSpec vertexSpec(fDeviceQuads.quadType(), fColorType, fLocalQuads.quadType(), + fHelper.usesLocalCoords(), Domain::kNo, fHelper.aaType(), + fHelper.compatibleWithAlphaAsCoverage()); // Make sure that if the op thought it was a solid color, the vertex spec does not use // local coords. SkASSERT(!fHelper.isTrivial() || !fHelper.usesLocalCoords()); @@ -258,7 +257,7 @@ private: // If the processor sets are compatible, the two ops are always compatible; it just needs to // adjust the state of the op to be the more general quad and aa types of the two ops and // then concatenate the per-quad data. - fWideColor |= that->fWideColor; + fColorType = SkTMax(fColorType, that->fColorType); // The helper stores the aa type, but isCompatible(with true arg) allows the two ops' aa // types to be none and coverage, in which case this op's aa type must be lifted to coverage @@ -297,8 +296,8 @@ private: } // clear compatible won't need to be updated, since device quad type and paint is the same, - // but this quad has a new color, so maybe update wide color - fWideColor |= !SkPMColor4fFitsInBytes(color); + // but this quad has a new color, so maybe update color type + fColorType = SkTMax(fColorType, GrQuadPerEdgeAA::MinColorType(color)); // Update the bounds and add the quad to this op's storage SkRect newBounds = this->bounds(); @@ -328,7 +327,7 @@ private: // No metadata attached to the local quads; this list is empty when local coords are not needed. GrQuadList fLocalQuads; - unsigned fWideColor: 1; + ColorType fColorType; typedef GrMeshDrawOp INHERITED; }; diff --git a/src/gpu/ops/GrQuadPerEdgeAA.cpp b/src/gpu/ops/GrQuadPerEdgeAA.cpp index 4845b55f07..cdc49b32ff 100644 --- a/src/gpu/ops/GrQuadPerEdgeAA.cpp +++ b/src/gpu/ops/GrQuadPerEdgeAA.cpp @@ -624,9 +624,7 @@ static CoverageMode get_mode_for_spec(const GrQuadPerEdgeAA::VertexSpec& spec) { // Writes four vertices in triangle strip order, including the additional data for local // coordinates, domain, color, and coverage as needed to satisfy the vertex spec. static void write_quad(GrVertexWriter* vb, const GrQuadPerEdgeAA::VertexSpec& spec, - CoverageMode mode, Sk4f coverage, - SkPMColor4f color4f, bool wideColor, - const SkRect& domain, + CoverageMode mode, Sk4f coverage, SkPMColor4f color4f, const SkRect& domain, const Vertices& quad) { static constexpr auto If = GrVertexWriter::If; @@ -639,8 +637,9 @@ static void write_quad(GrVertexWriter* vb, const GrQuadPerEdgeAA::VertexSpec& sp // save color if (spec.hasVertexColors()) { + bool wide = spec.colorType() == GrQuadPerEdgeAA::ColorType::kHalf; vb->write(GrVertexColor( - color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wideColor)); + color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wide)); } // save local position @@ -684,12 +683,21 @@ static sk_sp get_index_buffer(GrResourceProvider* resourcePro namespace GrQuadPerEdgeAA { +ColorType MinColorType(SkPMColor4f color) { + if (color == SK_PMColor4fWHITE) { + return ColorType::kNone; + } else if (color.fitsInBytes()) { + return ColorType::kByte; + } else { + return ColorType::kHalf; + } +} + ////////////////// Tessellate Implementation void* Tessellate(void* vertices, const VertexSpec& spec, const GrPerspQuad& deviceQuad, const SkPMColor4f& color4f, const GrPerspQuad& localQuad, const SkRect& domain, GrQuadAAFlags aaFlags) { - bool wideColor = GrQuadPerEdgeAA::ColorType::kHalf == spec.colorType(); CoverageMode mode = get_mode_for_spec(spec); // Load position data into Sk4fs (always x, y, and load w to avoid branching down the road) @@ -732,12 +740,12 @@ void* Tessellate(void* vertices, const VertexSpec& spec, const GrPerspQuad& devi // applied a mirror, etc. The current 2D case is already adequately fast. // Write two quads for inner and outer, inner will use the - write_quad(&vb, spec, mode, maxCoverage, color4f, wideColor, domain, inner); - write_quad(&vb, spec, mode, 0.f, color4f, wideColor, domain, outer); + write_quad(&vb, spec, mode, maxCoverage, color4f, domain, inner); + write_quad(&vb, spec, mode, 0.f, color4f, domain, outer); } else { // No outsetting needed, just write a single quad with full coverage SkASSERT(mode == CoverageMode::kNone); - write_quad(&vb, spec, mode, 1.f, color4f, wideColor, domain, outer); + write_quad(&vb, spec, mode, 1.f, color4f, domain, outer); } return vb.fPtr; diff --git a/src/gpu/ops/GrQuadPerEdgeAA.h b/src/gpu/ops/GrQuadPerEdgeAA.h index 232a10d2f0..a491f2603b 100644 --- a/src/gpu/ops/GrQuadPerEdgeAA.h +++ b/src/gpu/ops/GrQuadPerEdgeAA.h @@ -26,6 +26,9 @@ namespace GrQuadPerEdgeAA { enum class ColorType { kNone, kByte, kHalf, kLast = kHalf }; static const int kColorTypeCount = static_cast(ColorType::kLast) + 1; + // Gets the minimum ColorType that can represent a color. + ColorType MinColorType(SkPMColor4f); + // Specifies the vertex configuration for an op that renders per-edge AA quads. The vertex // order (when enabled) is device position, color, local position, domain, aa edge equations. // This order matches the constructor argument order of VertexSpec and is the order that diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp index 9db287a61b..5b4232ca39 100644 --- a/src/gpu/ops/GrTextureOp.cpp +++ b/src/gpu/ops/GrTextureOp.cpp @@ -291,7 +291,7 @@ private: auto bounds = dstQuad.bounds(dstQuadType); this->setBounds(bounds, HasAABloat(aaType == GrAAType::kCoverage), IsZeroArea::kNo); fDomain = static_cast(domain); - fWideColor = !SkPMColor4fFitsInBytes(color); + fColorType = static_cast(GrQuadPerEdgeAA::MinColorType(color)); fCanSkipAllocatorGather = static_cast(fProxies[0].fProxy->canSkipResourceAllocator()); } @@ -311,7 +311,7 @@ private: // identical, unless an entry provides a dstClip or additional transform that changes it. // The quad list will automatically adapt to that. fQuads.reserve(cnt, GrQuadTypeForTransformedRect(viewMatrix)); - + bool allOpaque = true; for (unsigned p = 0; p < fProxyCnt; ++p) { fProxies[p].fProxy = SkRef(set[p].fProxy.get()); fProxies[p].fQuadCnt = 1; @@ -351,6 +351,7 @@ private: set[p].fDstRect); } float alpha = SkTPin(set[p].fAlpha, 0.f, 1.f); + allOpaque &= (1.f == alpha); SkPMColor4f color{alpha, alpha, alpha, alpha}; int srcQuadIndex = -1; if (set[p].fDstClipQuad) { @@ -371,7 +372,7 @@ private: } this->setBounds(bounds, HasAABloat(this->aaType() == GrAAType::kCoverage), IsZeroArea::kNo); fDomain = static_cast(false); - fWideColor = static_cast(false); + fColorType = static_cast(allOpaque ? ColorType::kNone : ColorType::kByte); } void tess(void* v, const VertexSpec& spec, const GrTextureProxy* proxy, int start, @@ -408,7 +409,7 @@ private: GrQuadType quadType = GrQuadType::kRect; GrQuadType srcQuadType = GrQuadType::kRect; Domain domain = Domain::kNo; - bool wideColor = false; + ColorType colorType = ColorType::kNone; int numProxies = 0; int numTotalQuads = 0; auto textureType = fProxies[0].fProxy->textureType(); @@ -426,7 +427,7 @@ private: if (op.fDomain) { domain = Domain::kYes; } - wideColor |= op.fWideColor; + colorType = SkTMax(colorType, static_cast(op.fColorType)); numProxies += op.fProxyCnt; for (unsigned p = 0; p < op.fProxyCnt; ++p) { numTotalQuads += op.fProxies[p].fQuadCnt; @@ -443,8 +444,7 @@ private: } } - VertexSpec vertexSpec(quadType, wideColor ? ColorType::kHalf : ColorType::kByte, - srcQuadType, /* hasLocal */ true, domain, aaType, + VertexSpec vertexSpec(quadType, colorType, srcQuadType, /* hasLocal */ true, domain, aaType, /* alpha as coverage */ true); GrSamplerState samplerState = GrSamplerState(GrSamplerState::WrapMode::kClamp, @@ -562,7 +562,7 @@ private: } fDomain |= that->fDomain; - fWideColor |= that->fWideColor; + fColorType = SkTMax(fColorType, that->fColorType); if (upgradeToCoverageAAOnMerge) { fAAType = static_cast(GrAAType::kCoverage); } @@ -644,11 +644,12 @@ private: unsigned fFilter : 2; unsigned fAAType : 2; unsigned fDomain : 1; - unsigned fWideColor : 1; + unsigned fColorType : 2; + GR_STATIC_ASSERT(GrQuadPerEdgeAA::kColorTypeCount <= 4); // Used to track whether fProxy is ref'ed or has a pending IO after finalize() is called. unsigned fFinalized : 1; unsigned fCanSkipAllocatorGather : 1; - unsigned fProxyCnt : 32 - 8; + unsigned fProxyCnt : 32 - 9; Proxy fProxies[1]; static_assert(kGrQuadTypeCount <= 4, "GrQuadType does not fit in 2 bits");