skia2/tests/BulkRectTest.cpp

236 lines
10 KiB
C++
Raw Normal View History

/*
* Copyright 2019 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "include/gpu/GrDirectContext.h"
#include "src/core/SkBlendModePriv.h"
#include "src/gpu/GrContextPriv.h"
#include "src/gpu/GrProxyProvider.h"
#include "src/gpu/GrRenderTargetContext.h"
#include "src/gpu/ops/GrFillRectOp.h"
#include "src/gpu/ops/GrTextureOp.h"
#include "tests/Test.h"
static std::unique_ptr<GrRenderTargetContext> new_RTC(GrRecordingContext* rContext) {
return GrRenderTargetContext::Make(
rContext, GrColorType::kRGBA_8888, nullptr, SkBackingFit::kExact, {128, 128});
}
sk_sp<GrSurfaceProxy> create_proxy(GrRecordingContext* rContext) {
static constexpr SkISize kDimensions = {128, 128};
const GrBackendFormat format = rContext->priv().caps()->getDefaultBackendFormat(
GrColorType::kRGBA_8888,
GrRenderable::kYes);
return rContext->priv().proxyProvider()->createProxy(
format, kDimensions, GrRenderable::kYes, 1, GrMipmapped::kNo, SkBackingFit::kExact,
SkBudgeted::kNo, GrProtected::kNo, GrInternalSurfaceFlags::kNone);
}
typedef GrQuadAAFlags (*PerQuadAAFunc)(int i);
typedef void (*BulkRectTest)(skiatest::Reporter*,
GrDirectContext*,
PerQuadAAFunc perQuadAA,
GrAAType overallAA,
SkBlendMode,
int requestedTotNumQuads,
int expectedNumOps);
//-------------------------------------------------------------------------------------------------
static void bulk_fill_rect_create_test(skiatest::Reporter* reporter, GrDirectContext* dContext,
PerQuadAAFunc perQuadAA, GrAAType overallAA,
SkBlendMode blendMode,
int requestedTotNumQuads, int expectedNumOps) {
std::unique_ptr<GrRenderTargetContext> rtc = new_RTC(dContext);
auto quads = new GrRenderTargetContext::QuadSetEntry[requestedTotNumQuads];
for (int i = 0; i < requestedTotNumQuads; ++i) {
quads[i].fRect = SkRect::MakeWH(100.5f, 100.5f); // prevent the int non-AA optimization
quads[i].fColor = SK_PMColor4fWHITE;
quads[i].fLocalMatrix = SkMatrix::I();
quads[i].fAAFlags = perQuadAA(i);
}
GrPaint paint;
paint.setXPFactory(SkBlendMode_AsXPFactory(blendMode));
GrFillRectOp::AddFillRectOps(rtc.get(), nullptr, dContext, std::move(paint), overallAA,
SkMatrix::I(), quads, requestedTotNumQuads);
GrOpsTask* opsTask = rtc->testingOnly_PeekLastOpsTask();
int actualNumOps = opsTask->numOpChains();
int actualTotNumQuads = 0;
for (int i = 0; i < actualNumOps; ++i) {
const GrOp* tmp = opsTask->getChain(i);
REPORTER_ASSERT(reporter, tmp->classID() == GrFillRectOp::ClassID());
REPORTER_ASSERT(reporter, tmp->isChainTail());
actualTotNumQuads += ((GrDrawOp*) tmp)->numQuads();
}
REPORTER_ASSERT(reporter, expectedNumOps == actualNumOps);
REPORTER_ASSERT(reporter, requestedTotNumQuads == actualTotNumQuads);
dContext->flushAndSubmit();
delete[] quads;
}
//-------------------------------------------------------------------------------------------------
static void bulk_texture_rect_create_test(skiatest::Reporter* reporter, GrDirectContext* dContext,
PerQuadAAFunc perQuadAA, GrAAType overallAA,
SkBlendMode blendMode,
int requestedTotNumQuads, int expectedNumOps) {
std::unique_ptr<GrRenderTargetContext> rtc = new_RTC(dContext);
sk_sp<GrSurfaceProxy> proxyA = create_proxy(dContext);
sk_sp<GrSurfaceProxy> proxyB = create_proxy(dContext);
GrSurfaceProxyView proxyViewA(std::move(proxyA), kTopLeft_GrSurfaceOrigin, GrSwizzle::RGBA());
GrSurfaceProxyView proxyViewB(std::move(proxyB), kTopLeft_GrSurfaceOrigin, GrSwizzle::RGBA());
auto set = new GrRenderTargetContext::TextureSetEntry[requestedTotNumQuads];
for (int i = 0; i < requestedTotNumQuads; ++i) {
// Alternate between two proxies to prevent op merging if the batch API was forced to submit
// one op at a time (to work, this does require that all fDstRects overlap).
set[i].fProxyView = i % 2 == 0 ? proxyViewA : proxyViewB;
set[i].fSrcAlphaType = kPremul_SkAlphaType;
set[i].fSrcRect = SkRect::MakeWH(100.0f, 100.0f);
set[i].fDstRect = SkRect::MakeWH(100.5f, 100.5f); // prevent the int non-AA optimization
set[i].fDstClipQuad = nullptr;
set[i].fPreViewMatrix = nullptr;
set[i].fColor = {1.f, 1.f, 1.f, 1.f};
set[i].fAAFlags = perQuadAA(i);
}
GrTextureOp::AddTextureSetOps(rtc.get(),
nullptr,
dContext,
set,
requestedTotNumQuads,
requestedTotNumQuads, // We alternate so proxyCnt == cnt
Merge consecutive entries that share proxy in bulk texture op Previously, a batch draw that reused the same proxy consecutively would create a ViewCountPair for each set entry, with its count == 1. This turned into 1 draw per entry, so although there'd still be a single pipeline, it didn't take advantage of merging those consecutive entries into a larger draw to reduce draw count as well. Initially, the thinking for the batch API was that it was for tilers that used unique images for each tile or render pass. However, Chrome's compositor is also responsible for rendering 9 patches as part of the UI. These appear as 9 consecutive entries in the image set that all refer to the same texture. With this CL the texture op will automatically merge such occurrences into one ViewCountPair with a count of 9. The bulkrect_1000_[grid|random]_sharedimage_batch leverages this case. Before this CL its op would hold 1000 view count pairs that each drew one quad. Now its op will hold 1 view count pair with a count of 1000. On my linux workstation, the bulkrect_1000_grid_sharedimage_batch time went from 377us to 206us. For reference, the _ref variant (which already was a 1 view count pair with ct == 1000 due to merging of each op) has a time of 497us. The difference between 497us and 206us represents the overhead of calling through SkCanvas, op creation, quad optimization analysis 1000x. Interestingly the bulkrect_1000_random_sharedimage_batch benchmark did not change on my workstation. My conjecture is that it is bottlenecked by overdraw of the many overlapping rectangles. Change-Id: Icc4195de0bcb2219f424fdaa79728281c0418558 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/258418 Commit-Queue: Michael Ludwig <michaelludwig@google.com> Reviewed-by: Brian Salomon <bsalomon@google.com>
2019-12-06 18:21:26 +00:00
GrSamplerState::Filter::kNearest,
GrSamplerState::MipmapMode::kNone,
Merge consecutive entries that share proxy in bulk texture op Previously, a batch draw that reused the same proxy consecutively would create a ViewCountPair for each set entry, with its count == 1. This turned into 1 draw per entry, so although there'd still be a single pipeline, it didn't take advantage of merging those consecutive entries into a larger draw to reduce draw count as well. Initially, the thinking for the batch API was that it was for tilers that used unique images for each tile or render pass. However, Chrome's compositor is also responsible for rendering 9 patches as part of the UI. These appear as 9 consecutive entries in the image set that all refer to the same texture. With this CL the texture op will automatically merge such occurrences into one ViewCountPair with a count of 9. The bulkrect_1000_[grid|random]_sharedimage_batch leverages this case. Before this CL its op would hold 1000 view count pairs that each drew one quad. Now its op will hold 1 view count pair with a count of 1000. On my linux workstation, the bulkrect_1000_grid_sharedimage_batch time went from 377us to 206us. For reference, the _ref variant (which already was a 1 view count pair with ct == 1000 due to merging of each op) has a time of 497us. The difference between 497us and 206us represents the overhead of calling through SkCanvas, op creation, quad optimization analysis 1000x. Interestingly the bulkrect_1000_random_sharedimage_batch benchmark did not change on my workstation. My conjecture is that it is bottlenecked by overdraw of the many overlapping rectangles. Change-Id: Icc4195de0bcb2219f424fdaa79728281c0418558 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/258418 Commit-Queue: Michael Ludwig <michaelludwig@google.com> Reviewed-by: Brian Salomon <bsalomon@google.com>
2019-12-06 18:21:26 +00:00
GrTextureOp::Saturate::kYes,
blendMode,
overallAA,
SkCanvas::kStrict_SrcRectConstraint,
SkMatrix::I(),
nullptr);
GrOpsTask* opsTask = rtc->testingOnly_PeekLastOpsTask();
int actualNumOps = opsTask->numOpChains();
int actualTotNumQuads = 0;
if (blendMode != SkBlendMode::kSrcOver ||
!dContext->priv().caps()->dynamicStateArrayGeometryProcessorTextureSupport()) {
// In either of these two cases, GrTextureOp creates one op per quad instead. Since
// each entry alternates proxies but overlaps geometrically, this will prevent the ops
// from being merged back into fewer ops.
expectedNumOps = requestedTotNumQuads;
}
uint32_t expectedOpID = blendMode == SkBlendMode::kSrcOver ? GrTextureOp::ClassID()
: GrFillRectOp::ClassID();
for (int i = 0; i < actualNumOps; ++i) {
const GrOp* tmp = opsTask->getChain(i);
REPORTER_ASSERT(reporter, tmp->classID() == expectedOpID);
REPORTER_ASSERT(reporter, tmp->isChainTail());
actualTotNumQuads += ((GrDrawOp*) tmp)->numQuads();
}
REPORTER_ASSERT(reporter, expectedNumOps == actualNumOps);
REPORTER_ASSERT(reporter, requestedTotNumQuads == actualTotNumQuads);
dContext->flushAndSubmit();
delete[] set;
}
//-------------------------------------------------------------------------------------------------
static void run_test(GrDirectContext* dContext, skiatest::Reporter* reporter, BulkRectTest test) {
// This is the simple case where there is no AA at all. We expect 2 non-AA clumps of quads.
{
auto noAA = [](int i) -> GrQuadAAFlags {
return GrQuadAAFlags::kNone;
};
static const int kNumExpectedOps = 2;
test(reporter, dContext, noAA, GrAAType::kNone, SkBlendMode::kSrcOver,
2*GrResourceProvider::MaxNumNonAAQuads(), kNumExpectedOps);
}
// This is the same as the above case except the overall AA is kCoverage. However, since
// the per-quad AA is still none, all the quads should be downgraded to non-AA.
{
auto noAA = [](int i) -> GrQuadAAFlags {
return GrQuadAAFlags::kNone;
};
static const int kNumExpectedOps = 2;
test(reporter, dContext, noAA, GrAAType::kCoverage, SkBlendMode::kSrcOver,
2*GrResourceProvider::MaxNumNonAAQuads(), kNumExpectedOps);
}
// This case has an overall AA of kCoverage but the per-quad AA alternates.
// We should end up with several aa-sized clumps
{
auto alternateAA = [](int i) -> GrQuadAAFlags {
return (i % 2) ? GrQuadAAFlags::kAll : GrQuadAAFlags::kNone;
};
int numExpectedOps = 2*GrResourceProvider::MaxNumNonAAQuads() /
GrResourceProvider::MaxNumAAQuads();
test(reporter, dContext, alternateAA, GrAAType::kCoverage, SkBlendMode::kSrcOver,
2*GrResourceProvider::MaxNumNonAAQuads(), numExpectedOps);
}
// In this case we have a run of MaxNumAAQuads non-AA quads and then AA quads. This
// exercises the case where we have a clump of quads that can't be upgraded to AA bc of
// its size. We expect one clump of non-AA quads followed by one clump of AA quads.
{
auto runOfNonAA = [](int i) -> GrQuadAAFlags {
return (i < GrResourceProvider::MaxNumAAQuads()) ? GrQuadAAFlags::kNone
: GrQuadAAFlags::kAll;
};
static const int kNumExpectedOps = 2;
test(reporter, dContext, runOfNonAA, GrAAType::kCoverage, SkBlendMode::kSrcOver,
2*GrResourceProvider::MaxNumAAQuads(), kNumExpectedOps);
}
// In this case we use a blend mode other than src-over, which hits the GrFillRectOp fallback
// code path for GrTextureOp. We pass in the expected results if batching was successful, to
// that bulk_fill_rect_create_test batches on all modes; bulk_texture_rect_create_test is
// responsible for revising its expectations.
{
auto fixedAA = [](int i) -> GrQuadAAFlags {
return GrQuadAAFlags::kAll;
};
static const int kNumExpectedOps = 2;
test(reporter, dContext, fixedAA, GrAAType::kCoverage, SkBlendMode::kSrcATop,
2*GrResourceProvider::MaxNumAAQuads(), kNumExpectedOps);
}
}
DEF_GPUTEST_FOR_RENDERING_CONTEXTS(BulkFillRectTest, reporter, ctxInfo) {
run_test(ctxInfo.directContext(), reporter, bulk_fill_rect_create_test);
}
DEF_GPUTEST_FOR_RENDERING_CONTEXTS(BulkTextureRectTest, reporter, ctxInfo) {
run_test(ctxInfo.directContext(), reporter, bulk_texture_rect_create_test);
}