Reland "Try to avoid vertex colors in Texture/FillRect ops when possible."

This is a reland of e0b989e5e3

Original change's description:
> Try to avoid vertex colors in Texture/FillRect ops when possible.
> 
> Avoids unnecessary fragment shader color multiplication.
> 
> Change-Id: I353d3ca91824ce20c9e9af1c5c84ab9953ddd8ab
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/201004
> Commit-Queue: Brian Salomon <bsalomon@google.com>
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>

Change-Id: I22125cb7058f528cb368ff30c3c26e3d55056e66
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/201222
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
This commit is contained in:
Brian Salomon 2019-03-13 16:11:44 -04:00 committed by Skia Commit-Bot
parent f699bf0437
commit 1d835423e1
4 changed files with 38 additions and 27 deletions

View File

@ -79,7 +79,7 @@ public:
const GrPerspQuad& localQuad, GrQuadType localQuadType)
: INHERITED(ClassID())
, fHelper(args, aaType, stencil)
, fWideColor(!SkPMColor4fFitsInBytes(paintColor)) {
, fColorType(GrQuadPerEdgeAA::MinColorType(paintColor)) {
// The color stored with the quad is the clear color if a scissor-clear is decided upon
// when executing the op.
fDeviceQuads.push_back(deviceQuad, deviceQuadType, { paintColor, edgeFlags });
@ -178,10 +178,9 @@ private:
using Domain = GrQuadPerEdgeAA::Domain;
static constexpr SkRect kEmptyDomain = SkRect::MakeEmpty();
VertexSpec vertexSpec(fDeviceQuads.quadType(),
fWideColor ? ColorType::kHalf : ColorType::kByte,
fLocalQuads.quadType(), fHelper.usesLocalCoords(), Domain::kNo,
fHelper.aaType(), fHelper.compatibleWithAlphaAsCoverage());
VertexSpec vertexSpec(fDeviceQuads.quadType(), fColorType, fLocalQuads.quadType(),
fHelper.usesLocalCoords(), Domain::kNo, fHelper.aaType(),
fHelper.compatibleWithAlphaAsCoverage());
// Make sure that if the op thought it was a solid color, the vertex spec does not use
// local coords.
SkASSERT(!fHelper.isTrivial() || !fHelper.usesLocalCoords());
@ -258,7 +257,7 @@ private:
// If the processor sets are compatible, the two ops are always compatible; it just needs to
// adjust the state of the op to be the more general quad and aa types of the two ops and
// then concatenate the per-quad data.
fWideColor |= that->fWideColor;
fColorType = SkTMax(fColorType, that->fColorType);
// The helper stores the aa type, but isCompatible(with true arg) allows the two ops' aa
// types to be none and coverage, in which case this op's aa type must be lifted to coverage
@ -297,8 +296,8 @@ private:
}
// clear compatible won't need to be updated, since device quad type and paint is the same,
// but this quad has a new color, so maybe update wide color
fWideColor |= !SkPMColor4fFitsInBytes(color);
// but this quad has a new color, so maybe update color type
fColorType = SkTMax(fColorType, GrQuadPerEdgeAA::MinColorType(color));
// Update the bounds and add the quad to this op's storage
SkRect newBounds = this->bounds();
@ -328,7 +327,7 @@ private:
// No metadata attached to the local quads; this list is empty when local coords are not needed.
GrQuadList fLocalQuads;
unsigned fWideColor: 1;
ColorType fColorType;
typedef GrMeshDrawOp INHERITED;
};

View File

@ -624,9 +624,7 @@ static CoverageMode get_mode_for_spec(const GrQuadPerEdgeAA::VertexSpec& spec) {
// Writes four vertices in triangle strip order, including the additional data for local
// coordinates, domain, color, and coverage as needed to satisfy the vertex spec.
static void write_quad(GrVertexWriter* vb, const GrQuadPerEdgeAA::VertexSpec& spec,
CoverageMode mode, Sk4f coverage,
SkPMColor4f color4f, bool wideColor,
const SkRect& domain,
CoverageMode mode, Sk4f coverage, SkPMColor4f color4f, const SkRect& domain,
const Vertices& quad) {
static constexpr auto If = GrVertexWriter::If<float>;
@ -639,8 +637,9 @@ static void write_quad(GrVertexWriter* vb, const GrQuadPerEdgeAA::VertexSpec& sp
// save color
if (spec.hasVertexColors()) {
bool wide = spec.colorType() == GrQuadPerEdgeAA::ColorType::kHalf;
vb->write(GrVertexColor(
color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wideColor));
color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wide));
}
// save local position
@ -684,12 +683,21 @@ static sk_sp<const GrGpuBuffer> get_index_buffer(GrResourceProvider* resourcePro
namespace GrQuadPerEdgeAA {
ColorType MinColorType(SkPMColor4f color) {
if (color == SK_PMColor4fWHITE) {
return ColorType::kNone;
} else if (color.fitsInBytes()) {
return ColorType::kByte;
} else {
return ColorType::kHalf;
}
}
////////////////// Tessellate Implementation
void* Tessellate(void* vertices, const VertexSpec& spec, const GrPerspQuad& deviceQuad,
const SkPMColor4f& color4f, const GrPerspQuad& localQuad, const SkRect& domain,
GrQuadAAFlags aaFlags) {
bool wideColor = GrQuadPerEdgeAA::ColorType::kHalf == spec.colorType();
CoverageMode mode = get_mode_for_spec(spec);
// Load position data into Sk4fs (always x, y, and load w to avoid branching down the road)
@ -732,12 +740,12 @@ void* Tessellate(void* vertices, const VertexSpec& spec, const GrPerspQuad& devi
// applied a mirror, etc. The current 2D case is already adequately fast.
// Write two quads for inner and outer, inner will use the
write_quad(&vb, spec, mode, maxCoverage, color4f, wideColor, domain, inner);
write_quad(&vb, spec, mode, 0.f, color4f, wideColor, domain, outer);
write_quad(&vb, spec, mode, maxCoverage, color4f, domain, inner);
write_quad(&vb, spec, mode, 0.f, color4f, domain, outer);
} else {
// No outsetting needed, just write a single quad with full coverage
SkASSERT(mode == CoverageMode::kNone);
write_quad(&vb, spec, mode, 1.f, color4f, wideColor, domain, outer);
write_quad(&vb, spec, mode, 1.f, color4f, domain, outer);
}
return vb.fPtr;

View File

@ -26,6 +26,9 @@ namespace GrQuadPerEdgeAA {
enum class ColorType { kNone, kByte, kHalf, kLast = kHalf };
static const int kColorTypeCount = static_cast<int>(ColorType::kLast) + 1;
// Gets the minimum ColorType that can represent a color.
ColorType MinColorType(SkPMColor4f);
// Specifies the vertex configuration for an op that renders per-edge AA quads. The vertex
// order (when enabled) is device position, color, local position, domain, aa edge equations.
// This order matches the constructor argument order of VertexSpec and is the order that

View File

@ -291,7 +291,7 @@ private:
auto bounds = dstQuad.bounds(dstQuadType);
this->setBounds(bounds, HasAABloat(aaType == GrAAType::kCoverage), IsZeroArea::kNo);
fDomain = static_cast<unsigned>(domain);
fWideColor = !SkPMColor4fFitsInBytes(color);
fColorType = static_cast<unsigned>(GrQuadPerEdgeAA::MinColorType(color));
fCanSkipAllocatorGather =
static_cast<unsigned>(fProxies[0].fProxy->canSkipResourceAllocator());
}
@ -311,7 +311,7 @@ private:
// identical, unless an entry provides a dstClip or additional transform that changes it.
// The quad list will automatically adapt to that.
fQuads.reserve(cnt, GrQuadTypeForTransformedRect(viewMatrix));
bool allOpaque = true;
for (unsigned p = 0; p < fProxyCnt; ++p) {
fProxies[p].fProxy = SkRef(set[p].fProxy.get());
fProxies[p].fQuadCnt = 1;
@ -351,6 +351,7 @@ private:
set[p].fDstRect);
}
float alpha = SkTPin(set[p].fAlpha, 0.f, 1.f);
allOpaque &= (1.f == alpha);
SkPMColor4f color{alpha, alpha, alpha, alpha};
int srcQuadIndex = -1;
if (set[p].fDstClipQuad) {
@ -371,7 +372,7 @@ private:
}
this->setBounds(bounds, HasAABloat(this->aaType() == GrAAType::kCoverage), IsZeroArea::kNo);
fDomain = static_cast<unsigned>(false);
fWideColor = static_cast<unsigned>(false);
fColorType = static_cast<unsigned>(allOpaque ? ColorType::kNone : ColorType::kByte);
}
void tess(void* v, const VertexSpec& spec, const GrTextureProxy* proxy, int start,
@ -408,7 +409,7 @@ private:
GrQuadType quadType = GrQuadType::kRect;
GrQuadType srcQuadType = GrQuadType::kRect;
Domain domain = Domain::kNo;
bool wideColor = false;
ColorType colorType = ColorType::kNone;
int numProxies = 0;
int numTotalQuads = 0;
auto textureType = fProxies[0].fProxy->textureType();
@ -426,7 +427,7 @@ private:
if (op.fDomain) {
domain = Domain::kYes;
}
wideColor |= op.fWideColor;
colorType = SkTMax(colorType, static_cast<ColorType>(op.fColorType));
numProxies += op.fProxyCnt;
for (unsigned p = 0; p < op.fProxyCnt; ++p) {
numTotalQuads += op.fProxies[p].fQuadCnt;
@ -443,8 +444,7 @@ private:
}
}
VertexSpec vertexSpec(quadType, wideColor ? ColorType::kHalf : ColorType::kByte,
srcQuadType, /* hasLocal */ true, domain, aaType,
VertexSpec vertexSpec(quadType, colorType, srcQuadType, /* hasLocal */ true, domain, aaType,
/* alpha as coverage */ true);
GrSamplerState samplerState = GrSamplerState(GrSamplerState::WrapMode::kClamp,
@ -562,7 +562,7 @@ private:
}
fDomain |= that->fDomain;
fWideColor |= that->fWideColor;
fColorType = SkTMax(fColorType, that->fColorType);
if (upgradeToCoverageAAOnMerge) {
fAAType = static_cast<unsigned>(GrAAType::kCoverage);
}
@ -644,11 +644,12 @@ private:
unsigned fFilter : 2;
unsigned fAAType : 2;
unsigned fDomain : 1;
unsigned fWideColor : 1;
unsigned fColorType : 2;
GR_STATIC_ASSERT(GrQuadPerEdgeAA::kColorTypeCount <= 4);
// Used to track whether fProxy is ref'ed or has a pending IO after finalize() is called.
unsigned fFinalized : 1;
unsigned fCanSkipAllocatorGather : 1;
unsigned fProxyCnt : 32 - 8;
unsigned fProxyCnt : 32 - 9;
Proxy fProxies[1];
static_assert(kGrQuadTypeCount <= 4, "GrQuadType does not fit in 2 bits");