From bdc0bad2e216eab790842912ad184191c2426ac1 Mon Sep 17 00:00:00 2001 From: Michael Ludwig Date: Tue, 14 Dec 2021 15:10:00 -0500 Subject: [PATCH] [graphite] Implement inverse fills Renderer::StencilAndFill() chooses between two instances based on fill type (more to come when we add stencil pass). The inverse fill uses different stencil settings and different geometry (hence why it must be a distinct renderer, since stencil is part of the pipeline). Also updates the command buffer asserts and types to support float3 attributes and has the fill bounds render step pre-transform vertices. This matches the intended plan of device-space control points to avoid matrix transform uniforms when no other coords are needed. Makes DepthStencilSettings constexpr so they can be declared constants. Cq-Include-Trybots: luci.skia.skia.primary:Test-Mac11-Clang-MacMini9.1-GPU-AppleM1-arm64-Release-All-Graphite,Test-Mac11-Clang-MacMini9.1-GPU-AppleM1-arm64-Debug-All-ASAN_Graphite,Build-Mac-Clang-arm64-Release-iOS_Graphite,Build-Mac-Clang-arm64-Release-Graphite,Build-Mac-Clang-arm64-Debug-iOS_Graphite,Build-Mac-Clang-arm64-Debug-Graphite_NoGpu,Build-Mac-Clang-arm64-Debug-Graphite,Build-Mac-Clang-arm64-Debug-ASAN_Graphite Bug: skia:12703 Change-Id: I5be4151f533e4cc5c560baf96c59193162b48dab Reviewed-on: https://skia-review.googlesource.com/c/skia/+/484559 Reviewed-by: Greg Daniel Commit-Queue: Michael Ludwig --- experimental/graphite/src/DrawPass.cpp | 9 +- experimental/graphite/src/DrawTypes.h | 34 ++++- experimental/graphite/src/Renderer.h | 8 +- experimental/graphite/src/geom/Transform.cpp | 22 +++ .../graphite/src/geom/Transform_graphite.h | 3 + .../graphite/src/mtl/MtlCommandBuffer.mm | 6 +- .../graphite/src/mtl/MtlGraphicsPipeline.mm | 3 + .../src/render/StencilAndFillPathRenderer.cpp | 144 +++++++++++++----- tests/graphite/CommandBufferTest.cpp | 37 ++++- 9 files changed, 211 insertions(+), 55 deletions(-) diff --git a/experimental/graphite/src/DrawPass.cpp b/experimental/graphite/src/DrawPass.cpp index 7f269eb496..24f7ce3a1e 100644 --- a/experimental/graphite/src/DrawPass.cpp +++ b/experimental/graphite/src/DrawPass.cpp @@ -306,8 +306,11 @@ std::unique_ptr DrawPass::Make(Recorder* recorder, uint32_t geometryIndex = UniformCache::kInvalidUniformID; if (step->numUniforms() > 0) { // TODO: Get layout from the GPU - geometryIndex = geometryUniformBindings.addUniforms( - step->writeUniforms(Layout::kMetal, draw.fTransform, draw.fShape)); + auto uniforms = step->writeUniforms(Layout::kMetal, + draw.fClip.scissor(), + draw.fTransform, + draw.fShape); + geometryIndex = geometryUniformBindings.addUniforms(std::move(uniforms)); } GraphicsPipelineDesc desc; @@ -401,7 +404,7 @@ std::unique_ptr DrawPass::Make(Recorder* recorder, } } - renderStep.writeVertices(&drawWriter, draw.fTransform, draw.fShape); + renderStep.writeVertices(&drawWriter, draw.fClip.scissor(), draw.fTransform, draw.fShape); } // Finish recording draw calls for any collected data at the end of the loop drawWriter.flush(); diff --git a/experimental/graphite/src/DrawTypes.h b/experimental/graphite/src/DrawTypes.h index 3c043b1ca4..ae9f615d45 100644 --- a/experimental/graphite/src/DrawTypes.h +++ b/experimental/graphite/src/DrawTypes.h @@ -260,6 +260,20 @@ static constexpr int kStencilOpCount = 1 + (int)StencilOp::kDecClamp; struct DepthStencilSettings { // Per-face settings for stencil struct Face { + constexpr Face() = default; + constexpr Face(StencilOp stencilFail, + StencilOp depthFail, + StencilOp dsPass, + CompareOp compare, + uint32_t readMask, + uint32_t writeMask) + : fStencilFailOp(stencilFail) + , fDepthFailOp(depthFail) + , fDepthStencilPassOp(dsPass) + , fCompareOp(compare) + , fReadMask(readMask) + , fWriteMask(writeMask) {} + StencilOp fStencilFailOp = StencilOp::kKeep; StencilOp fDepthFailOp = StencilOp::kKeep; StencilOp fDepthStencilPassOp = StencilOp::kKeep; @@ -267,7 +281,7 @@ struct DepthStencilSettings { uint32_t fReadMask = 0xffffffff; uint32_t fWriteMask = 0xffffffff; - bool operator==(const Face& that) const { + constexpr bool operator==(const Face& that) const { return this->fStencilFailOp == that.fStencilFailOp && this->fDepthFailOp == that.fDepthFailOp && this->fDepthStencilPassOp == that.fDepthStencilPassOp && @@ -277,7 +291,23 @@ struct DepthStencilSettings { } }; - bool operator==(const DepthStencilSettings& that) const { + constexpr DepthStencilSettings() = default; + constexpr DepthStencilSettings(Face front, + Face back, + uint32_t stencilRef, + bool stencilTest, + CompareOp depthCompare, + bool depthTest, + bool depthWrite) + : fFrontStencil(front) + , fBackStencil(back) + , fStencilReferenceValue(stencilRef) + , fDepthCompareOp(depthCompare) + , fStencilTestEnabled(stencilTest) + , fDepthTestEnabled(depthTest) + , fDepthWriteEnabled(depthWrite) {} + + constexpr bool operator==(const DepthStencilSettings& that) const { return this->fFrontStencil == that.fFrontStencil && this->fBackStencil == that.fBackStencil && this->fStencilReferenceValue == that.fStencilReferenceValue && diff --git a/experimental/graphite/src/Renderer.h b/experimental/graphite/src/Renderer.h index c916bd22e5..395aecbebe 100644 --- a/experimental/graphite/src/Renderer.h +++ b/experimental/graphite/src/Renderer.h @@ -22,7 +22,7 @@ #include #include - +struct SkIRect; enum class SkPathFillType; namespace skgpu { @@ -42,7 +42,10 @@ public: // The DrawWriter is configured with the vertex and instance strides of the RenderStep, and its // primitive type. The recorded draws will be executed with a graphics pipeline compatible with // this RenderStep. - virtual void writeVertices(DrawWriter*, const Transform&, const Shape&) const = 0; + virtual void writeVertices(DrawWriter*, + const SkIRect& bounds, + const Transform&, + const Shape&) const = 0; // Write out the uniform values (aligned for the layout). These values will be de-duplicated // across all draws using the RenderStep before uploading to the GPU, but it can be assumed the @@ -54,6 +57,7 @@ public: // Similarly, it would be nice if this could write into reusable storage and then DrawPass or // UniformCache handles making an sk_sp if we need to assign a new unique ID to the uniform data virtual sk_sp writeUniforms(Layout layout, + const SkIRect& bounds, const Transform&, const Shape&) const = 0; diff --git a/experimental/graphite/src/geom/Transform.cpp b/experimental/graphite/src/geom/Transform.cpp index cab0b1d0f1..27687d27c9 100644 --- a/experimental/graphite/src/geom/Transform.cpp +++ b/experimental/graphite/src/geom/Transform.cpp @@ -8,6 +8,7 @@ #include "experimental/graphite/src/geom/Transform_graphite.h" #include "experimental/graphite/src/geom/Rect.h" +#include "experimental/graphite/src/geom/VectorTypes.h" #include "src/core/SkMatrixPriv.h" namespace skgpu { @@ -45,4 +46,25 @@ bool Transform::operator==(const Transform& t) const { Rect Transform::mapRect(const Rect& rect) const { return map_rect(fM, rect); } Rect Transform::inverseMapRect(const Rect& rect) const { return map_rect(fInvM, rect); } +void Transform::mapPoints(const Rect& localRect, SkV4 deviceOut[4]) const { + SkV2 localCorners[4] = {{localRect.left(), localRect.top()}, + {localRect.right(), localRect.top()}, + {localRect.right(), localRect.bot()}, + {localRect.left(), localRect.bot()}}; + this->mapPoints(localCorners, deviceOut, 4); +} + +void Transform::mapPoints(const SkV2* localIn, SkV4* deviceOut, int count) const { + // TODO: These maybe should go into SkM44, since bulk point mapping seems generally useful + float4 c0 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 0); + float4 c1 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 4); + // skip c2 since localIn's z is assumed to be 0 + float4 c3 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 12); + + for (int i = 0; i < count; ++i) { + float4 p = c0 * localIn[i].x + c1 * localIn[i].y /* + c2*0.f */ + c3 /* *1.f */; + p.store(deviceOut + i); + } +} + } // namespace skgpu diff --git a/experimental/graphite/src/geom/Transform_graphite.h b/experimental/graphite/src/geom/Transform_graphite.h index 8b57681540..6cdc3acb3b 100644 --- a/experimental/graphite/src/geom/Transform_graphite.h +++ b/experimental/graphite/src/geom/Transform_graphite.h @@ -63,6 +63,9 @@ public: Rect mapRect(const Rect& rect) const; Rect inverseMapRect(const Rect& rect) const; + void mapPoints(const Rect& localRect, SkV4 deviceOut[4]) const; + void mapPoints(const SkV2* localIn, SkV4* deviceOut, int count) const; + private: SkM44 fM; SkM44 fInvM; // M^-1 diff --git a/experimental/graphite/src/mtl/MtlCommandBuffer.mm b/experimental/graphite/src/mtl/MtlCommandBuffer.mm index 1561122793..3197f28b5a 100644 --- a/experimental/graphite/src/mtl/MtlCommandBuffer.mm +++ b/experimental/graphite/src/mtl/MtlCommandBuffer.mm @@ -252,13 +252,15 @@ void CommandBuffer::onBindVertexBuffers(const skgpu::Buffer* vertexBuffer, if (vertexBuffer) { id mtlBuffer = static_cast(vertexBuffer)->mtlBuffer(); - SkASSERT((vertexOffset & 0xF) == 0); + // Metal requires buffer offsets to be aligned to the data type, which is at most 4 bytes + // since we use [[attribute]] to automatically unpack float components into SIMD arrays. + SkASSERT((vertexOffset & 0b11) == 0); fActiveRenderCommandEncoder->setVertexBuffer(mtlBuffer, vertexOffset, GraphicsPipeline::kVertexBufferIndex); } if (instanceBuffer) { id mtlBuffer = static_cast(instanceBuffer)->mtlBuffer(); - SkASSERT((instanceOffset & 0xF) == 0); + SkASSERT((instanceOffset & 0b11) == 0); fActiveRenderCommandEncoder->setVertexBuffer(mtlBuffer, instanceOffset, GraphicsPipeline::kInstanceBufferIndex); } diff --git a/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm b/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm index a0045d7e67..3096a99d14 100644 --- a/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm +++ b/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm @@ -87,6 +87,9 @@ SkSL::String emit_SkSL_attributes(SkSpan vertexAttrs, case SLType::kFloat2: result.append("float2"); break; + case SLType::kFloat3: + result.append("float3"); + break; case SLType::kFloat: result.append("float"); break; diff --git a/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp b/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp index 278170a05b..d70ee3f95c 100644 --- a/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp +++ b/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp @@ -13,12 +13,66 @@ #include "experimental/graphite/src/geom/Shape.h" #include "experimental/graphite/src/geom/Transform_graphite.h" #include "include/core/SkPathTypes.h" +#include "include/core/SkRect.h" #include "src/gpu/BufferWriter.h" namespace skgpu { namespace { +// TODO: These settings are actually shared by tessellating path renderers, so will be exposed later + +// Returns the stencil settings to use for a standard Redbook "fill" pass. Allows non-zero +// stencil values to pass and write a color, and resets the stencil value back to zero; discards +// immediately on stencil values of zero (or does the inverse of these operations when the path +// requires filling everything else). +constexpr DepthStencilSettings cover_settings(bool inverse) { + // Resets non-zero bits to 0, passes when not zero. We set depthFail to kZero because if we + // encounter that case, the kNotEqual=0 stencil test passed, so it does need to be set back to 0 + // and the dsPass op won't be run. In practice, since the stencil steps will fail the same depth + // test, the stencil value will likely not be non-zero, but best to be explicit. + constexpr DepthStencilSettings::Face kNormal = { + /*stencilFail=*/ StencilOp::kKeep, + /*depthFail=*/ StencilOp::kZero, + /*dsPass=*/ StencilOp::kZero, + /*stencilCompare=*/CompareOp::kEqual, // TODO: Actually kNotEqual once we have stencil steps + /*readMask=*/ 0xffffffff, + /*writeMask=*/ 0xffffffff + }; + + // Resets non-zero bits to 0, passes when zero + constexpr DepthStencilSettings::Face kInverted = { + /*stencilFail=*/ StencilOp::kZero, + /*depthFail=*/ StencilOp::kKeep, + /*dsPass=*/ StencilOp::kKeep, + /*stencilCompare=*/CompareOp::kEqual, + /*readMask=*/ 0xffffffff, + /*writeMask=*/ 0xffffffff + }; + + // Always use ref = 0, enabled depth writes, and greater depth test, both + // front and back use the same stencil settings. + constexpr DepthStencilSettings kNormalDSS = { + /*frontStencil=*/kNormal, + /*frontStencil=*/kNormal, + /*refValue=*/ 0, + /*stencilTest=*/ true, + /*depthCompare=*/CompareOp::kAlways, // kGreater once steps know the right depth value + /*depthTest=*/ true, + /*depthWrite=*/ true + }; + constexpr DepthStencilSettings kInvertedDSS = { + /*frontStencil=*/kInverted, + /*backStencil=*/ kInverted, + /*refValue=*/ 0, + /*stencilTest=*/ true, + /*depthCompare=*/CompareOp::kAlways, // kGreater once steps know the right depth value + /*depthTest=*/ true, + /*depthWrite=*/ true + }; + return inverse ? kInvertedDSS : kNormalDSS; +} + // TODO: Hand off to csmartdalton, this should roughly correspond to the fStencilFanProgram and // simple triangulator shader stage of the skgpu::v1::PathStencilCoverOp /* @@ -61,53 +115,58 @@ class FillBoundsRenderStep final : public RenderStep { public: // TODO: Will need to add kRequiresStencil when we support specifying stencil settings and // the Renderer includes the stenciling step first. - FillBoundsRenderStep() + FillBoundsRenderStep(bool inverseFill) : RenderStep(Flags::kPerformsShading, - /*uniforms=*/{{"localToDevice", SLType::kFloat4x4}}, - PrimitiveType::kTriangleStrip, - DepthStencilSettings(), - /*vertexAttrs=*/{{"position", VertexAttribType::kFloat2, SLType::kFloat2}}, - /*instanceAttrs=*/{}) {} + /*uniforms=*/{}, + PrimitiveType::kTriangles, + cover_settings(inverseFill), + /*vertexAttrs=*/{{"position", VertexAttribType::kFloat3, SLType::kFloat3}}, + /*instanceAttrs=*/{}) + , fInverseFill(inverseFill) {} ~FillBoundsRenderStep() override {} const char* name() const override { return "fill-bounds"; } const char* vertexSkSL() const override { - // TODO: RenderSteps should not worry about RTAdjust, but currently the mtl pipeline does - // account for it, so this geometry won't be in the right coordinate system yet. - return " float4 devPosition = localToDevice * float4(position, 0.0, 1.0);\n"; + return " float4 devPosition = float4(position.xy, 0.0, position.z);\n"; } - void writeVertices(DrawWriter* writer, const Transform&, const Shape& shape) const override { - // TODO: For now the transform is handled as a uniform so writeVertices ignores it, but - // for something as simple as the bounding box, CPU transformation might be best. - writer->appendVertices(4) - .writeQuad(VertexWriter::TriStripFromRect(shape.bounds().asSkRect())); - // Since we upload 4 dynamic verts as a triangle strip, we need to actually draw them - // otherwise the next writeVertices() call would get connected to our verts. - // TODO: Primitive restart? Just use indexed drawing? Just write 6 verts? - writer->flush(); + void writeVertices(DrawWriter* writer, + const SkIRect& bounds, + const Transform& localToDevice, + const Shape& shape) const override { + SkV4 devPoints[4]; // ordered TL, TR, BR, BL + + if (fInverseFill) { + // TODO: When we handle local coords, we'd need to map these corners by the inverse. + devPoints[0] = {(float) bounds.fLeft, (float) bounds.fTop, 0.f, 1.f}; + devPoints[1] = {(float) bounds.fRight, (float) bounds.fTop, 0.f, 1.f}; + devPoints[2] = {(float) bounds.fRight, (float) bounds.fBottom, 0.f, 1.f}; + devPoints[3] = {(float) bounds.fLeft, (float) bounds.fBottom, 0.f, 1.f}; + } else { + localToDevice.mapPoints(shape.bounds(), devPoints); + } + + auto vw = writer->appendVertices(6); + vw << devPoints[0].x << devPoints[0].y << devPoints[0].w // TL + << devPoints[3].x << devPoints[3].y << devPoints[3].w // BL + << devPoints[1].x << devPoints[1].y << devPoints[1].w // TR + << devPoints[1].x << devPoints[1].y << devPoints[1].w // TR + << devPoints[3].x << devPoints[3].y << devPoints[3].w // BL + << devPoints[2].x << devPoints[2].y << devPoints[2].w;// BR } sk_sp writeUniforms(Layout layout, + const SkIRect&, const Transform& localToDevice, const Shape&) const override { - // TODO: Given that a RenderStep has its own uniform binding slot, these offsets never - // change so we could cache them per layout. - UniformManager mgr(layout); - size_t dataSize = mgr.writeUniforms(this->uniforms(), nullptr, nullptr, nullptr); - sk_sp transformData = UniformData::Make((int) this->numUniforms(), - this->uniforms().data(), - dataSize); - - const void* transform[1] = {&localToDevice.matrix()}; - mgr.writeUniforms(this->uniforms(), - transform, - transformData->offsets(), - transformData->data()); - return transformData; + // Positions are pre-transformed on the CPU so no uniforms needed + return nullptr; } + +private: + const bool fInverseFill; }; } // anonymous namespace @@ -118,13 +177,22 @@ const Renderer& Renderer::StencilAndFillPath(SkPathFillType fillType) { // TODO: Uncomment and include in kRenderer to draw curved paths // static const StencilCurvesRenderStep kStencilCurves; // TODO: This could move into a header and be reused across renderers - static const FillBoundsRenderStep kCover; - static const Renderer kRenderer("stencil-and-fill", - /*&kStencilFan,*/ /*&kStencilCurves,*/ &kCover); + static const FillBoundsRenderStep kFill{false}; + static const FillBoundsRenderStep kInverseFill{true}; - // TODO: actually configure 4 different combinations of fill+stencil steps to match the - // path fill types. - return kRenderer; + // TODO: Combine these two with stencil steps for winding and even-odd. + static const Renderer kFillRenderer("stencil-and-fill[]", &kFill); + static const Renderer kInverseFillRenderer("stencil-and-fill[inverse]", &kInverseFill); + + switch (fillType) { + case SkPathFillType::kWinding: [[fallthrough]]; + case SkPathFillType::kEvenOdd: + return kFillRenderer; + case SkPathFillType::kInverseWinding: [[fallthrough]]; + case SkPathFillType::kInverseEvenOdd: + return kInverseFillRenderer; + } + SkUNREACHABLE; } } // namespace skgpu diff --git a/tests/graphite/CommandBufferTest.cpp b/tests/graphite/CommandBufferTest.cpp index da0f91b993..38822411db 100644 --- a/tests/graphite/CommandBufferTest.cpp +++ b/tests/graphite/CommandBufferTest.cpp @@ -37,11 +37,13 @@ using namespace skgpu; namespace { const DepthStencilSettings kTestDepthStencilSettings = { + // stencil {}, {}, 0, - CompareOp::kAlways, true, + // depth + CompareOp::kAlways, true, false, }; @@ -62,12 +64,16 @@ public: "float4 devPosition = float4(tmpPosition * scale + translate, 0.0, 1.0);\n"; } - void writeVertices(DrawWriter* writer, const Transform&, const Shape&) const override { + void writeVertices(DrawWriter* writer, + const SkIRect&, + const Transform&, + const Shape&) const override { // The shape is upload via uniforms, so this just needs to record 4 data-less vertices writer->draw({}, {}, 4); } sk_sp writeUniforms(Layout layout, + const SkIRect&, const Transform&, const Shape& shape) const override { SkASSERT(shape.isRect()); @@ -107,7 +113,10 @@ public: return "float4 devPosition = float4(position * scale + translate, 0.0, 1.0);\n"; } - void writeVertices(DrawWriter* writer, const Transform&, const Shape& shape) const override { + void writeVertices(DrawWriter* writer, + const SkIRect&, + const Transform&, + const Shape& shape) const override { DrawBufferManager* bufferMgr = writer->bufferManager(); auto [vertexWriter, vertices] = bufferMgr->getVertexWriter(4 * this->vertexStride()); vertexWriter << 0.5f * (shape.rect().left() + 1.f) << 0.5f * (shape.rect().top() + 1.f) @@ -123,7 +132,10 @@ public: writer->draw(vertices, indices, 6); } - sk_sp writeUniforms(Layout layout, const Transform&, const Shape&) const override { + sk_sp writeUniforms(Layout layout, + const SkIRect&, + const Transform&, + const Shape&) const override { auto uniforms = UniformData::Make(this->numUniforms(), this->uniforms().data(), sizeof(float) * 4); float data[4] = {2.f, 2.f, -1.f, -1.f}; @@ -158,7 +170,10 @@ public: "float4 devPosition = float4(tmpPosition * dims + position, 0.0, 1.0);\n"; } - void writeVertices(DrawWriter* writer, const Transform&, const Shape& shape) const override { + void writeVertices(DrawWriter* writer, + const SkIRect&, + const Transform&, + const Shape& shape) const override { SkASSERT(shape.isRect()); DrawBufferManager* bufferMgr = writer->bufferManager(); @@ -174,7 +189,10 @@ public: instanceWriter << shape.rect().topLeft() << shape.rect().size(); } - sk_sp writeUniforms(Layout, const Transform&, const Shape&) const override { + sk_sp writeUniforms(Layout, + const SkIRect&, + const Transform&, + const Shape&) const override { return nullptr; } @@ -269,12 +287,15 @@ DEF_GRAPHITE_TEST_FOR_CONTEXTS(CommandBufferTest, reporter, context) { // All of the test RenderSteps ignore the transform, so just use the identity static const Transform kIdentity{SkM44()}; + // No set scissor, so use entire render target dimensions + static const SkIRect kBounds = SkIRect::MakeWH(kTextureWidth, kTextureHeight); for (auto d : draws) { drawWriter.newDynamicState(); Shape shape(d.fRect); - auto renderStepUniforms = step->writeUniforms(Layout::kMetal, kIdentity, shape); + auto renderStepUniforms = + step->writeUniforms(Layout::kMetal, kBounds, kIdentity, shape); if (renderStepUniforms) { auto [writer, bindInfo] = bufferMgr.getUniformWriter(renderStepUniforms->dataSize()); @@ -291,7 +312,7 @@ DEF_GRAPHITE_TEST_FOR_CONTEXTS(CommandBufferTest, reporter, context) { sk_ref_sp(bindInfo.fBuffer), bindInfo.fOffset); - step->writeVertices(&drawWriter, kIdentity, shape); + step->writeVertices(&drawWriter, kBounds, kIdentity, shape); } };