Make program key descriptions available in release builds

Whenever we need the description (as we're putting something in the
persistent cache), go back and re-compute the key to get the
description. This pattern means that we pay a (very) small overhead for
virtual dispatch in the common case, and only pay for the expensive
string work when we're already doing expensive work (compiling).

Bug: skia:11372
Change-Id: I3d4dc19f2d8883f8117f5f6489fc852cf9503eb3
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/380359
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
This commit is contained in:
Brian Osman 2021-03-05 14:58:25 -05:00 committed by Skia Commit-Bot
parent 6fb520fcd5
commit e23c03d337
13 changed files with 130 additions and 157 deletions

View File

@ -203,8 +203,7 @@ public:
auto add_attributes = [=](const Attribute* attrs, int attrCount) {
for (int i = 0; i < attrCount; ++i) {
const Attribute& attr = attrs[i];
b->appendComment(
[&attr]() { return attr.isInitialized() ? attr.name() : "unusedAttr"; });
b->appendComment(attr.isInitialized() ? attr.name() : "unusedAttr");
b->addBits(8, attr.isInitialized() ? attr.cpuType() : 0xff, "attrType");
b->addBits(8, attr.isInitialized() ? attr.gpuType() : 0xff, "attrGpuType");
}

View File

@ -81,7 +81,7 @@ static constexpr uint32_t kClassIDBits = 8;
static void gen_pp_key(const GrPrimitiveProcessor& pp,
const GrCaps& caps,
GrProcessorKeyBuilder* b) {
b->appendComment([&]() { return pp.name(); });
b->appendComment(pp.name());
b->addBits(kClassIDBits, pp.classID(), "ppClassID");
pp.getGLSLProcessorKey(*caps.shaderCaps(), b);
@ -94,7 +94,7 @@ static void gen_xp_key(const GrXferProcessor& xp,
const GrCaps& caps,
const GrPipeline& pipeline,
GrProcessorKeyBuilder* b) {
b->appendComment([&](){ return xp.name(); });
b->appendComment(xp.name());
b->addBits(kClassIDBits, xp.classID(), "xpClassID");
const GrSurfaceOrigin* originIfDstTexture = nullptr;
@ -109,7 +109,7 @@ static void gen_xp_key(const GrXferProcessor& xp,
static void gen_fp_key(const GrFragmentProcessor& fp,
const GrCaps& caps,
GrProcessorKeyBuilder* b) {
b->appendComment([&]() { return fp.name(); });
b->appendComment(fp.name());
b->addBits(kClassIDBits, fp.classID(), "fpClassID");
b->addBits(GrPrimitiveProcessor::kCoordTransformKeyBits,
GrPrimitiveProcessor::ComputeCoordTransformsKey(fp), "fpTransforms");
@ -129,30 +129,16 @@ static void gen_fp_key(const GrFragmentProcessor& fp,
gen_fp_key(*child, caps, b);
} else {
// Fold in a sentinel value as the "class ID" for any null children
b->appendComment([&](){ return "Null"; });
b->appendComment("Null");
b->addBits(kClassIDBits, GrProcessor::ClassID::kNull_ClassID, "fpClassID");
}
}
}
bool GrProgramDesc::Build(GrProgramDesc* desc,
GrRenderTarget* renderTarget,
const GrProgramInfo& programInfo,
const GrCaps& caps) {
#ifdef SK_DEBUG
if (renderTarget) {
SkASSERT(programInfo.backendFormat() == renderTarget->backendFormat());
}
#endif
// The descriptor is used as a cache key. Thus when a field of the
// descriptor will not affect program generation (because of the attribute
// bindings in use or other descriptor field settings) it should be set
// to a canonical value to avoid duplicate programs with different keys.
GrProcessorKeyBuilder* b = desc->key();
b->reset();
static void gen_key(GrProcessorKeyBuilder* b,
GrRenderTarget* renderTarget,
const GrProgramInfo& programInfo,
const GrCaps& caps) {
gen_pp_key(programInfo.primProc(), caps, b);
const GrPipeline& pipeline = programInfo.pipeline();
@ -182,7 +168,28 @@ bool GrProgramDesc::Build(GrProgramDesc* desc,
// Put a clean break between the "common" data written by this function, and any backend data
// appended later. The initial key length will just be this portion (rounded to 4 bytes).
b->flush();
desc->fInitialKeyLength = desc->keyLength();
return true;
}
void GrProgramDesc::Build(GrProgramDesc* desc,
GrRenderTarget* renderTarget,
const GrProgramInfo& programInfo,
const GrCaps& caps) {
SkASSERT(!renderTarget || programInfo.backendFormat() == renderTarget->backendFormat());
desc->reset();
GrProcessorKeyBuilder b(desc->key());
gen_key(&b, renderTarget, programInfo, caps);
desc->fInitialKeyLength = desc->keyLength();
}
SkString GrProgramDesc::Describe(GrRenderTarget* renderTarget,
const GrProgramInfo& programInfo,
const GrCaps& caps) {
SkASSERT(!renderTarget || programInfo.backendFormat() == renderTarget->backendFormat());
GrProgramDesc desc;
GrProcessorStringKeyBuilder b(desc.key());
gen_key(&b, renderTarget, programInfo, caps);
b.flush();
return b.description();
}

View File

@ -22,23 +22,23 @@ class GrShaderCaps;
class GrProcessorKeyBuilder {
public:
GrProcessorKeyBuilder() = default;
GrProcessorKeyBuilder(const GrProcessorKeyBuilder& other) = default;
GrProcessorKeyBuilder(SkTArray<uint32_t, true>* data) : fData(data) {}
void reset() { *this = GrProcessorKeyBuilder{}; }
virtual ~GrProcessorKeyBuilder() {
// Ensure that flush was called before we went out of scope
SkASSERT(fBitsUsed == 0);
}
void addBits(uint32_t numBits, uint32_t val, const char* label) {
virtual void addBits(uint32_t numBits, uint32_t val, const char* label) {
SkASSERT(numBits > 0 && numBits <= 32);
SkASSERT(numBits == 32 || (val < (1u << numBits)));
SkDEBUGCODE(fDescription.appendf("%s: %u\n", label, val);)
fCurValue |= (val << fBitsUsed);
fBitsUsed += numBits;
if (fBitsUsed >= 32) {
// Overflow, start a new working value
fData.push_back(fCurValue);
fData->push_back(fCurValue);
uint32_t excess = fBitsUsed - 32;
fCurValue = excess ? (val >> (numBits - excess)) : 0;
fBitsUsed = excess;
@ -48,7 +48,6 @@ public:
}
void addBytes(uint32_t numBytes, const void* data, const char* label) {
// TODO: Make this smarter/faster?
const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
for (; numBytes --> 0; bytes++) {
this->addBits(8, *bytes, label);
@ -63,75 +62,42 @@ public:
this->addBits(32, v, label);
}
template <typename StringFunc>
void appendComment(StringFunc&& sf) {
#ifdef SK_DEBUG
fDescription.append(sf());
fDescription.append("\n");
#endif
}
virtual void appendComment(const char* comment) {}
// Introduces a word-boundary in the key. Must be called before using the key with any cache,
// but can also be called to create a break between generic data and backend-specific data.
void flush() {
if (fBitsUsed) {
fData.push_back(fCurValue);
fData->push_back(fCurValue);
fCurValue = 0;
fBitsUsed = 0;
}
}
bool empty() const { return fData.empty() && !fBitsUsed; }
const uint32_t* data() const {
SkASSERT(fBitsUsed == 0); // flush() must be called when construction is complete
return fData.begin();
}
size_t size() const {
return (fData.count() + (fBitsUsed ? 1 : 0)) * sizeof(uint32_t);
}
GrProcessorKeyBuilder& operator=(const GrProcessorKeyBuilder& other) = default;
bool operator==(const GrProcessorKeyBuilder& that) const {
return fBitsUsed == that.fBitsUsed &&
fCurValue == that.fCurValue &&
fData == that.fData;
}
bool operator!= (const GrProcessorKeyBuilder& other) const {
return !(*this == other);
}
void setData(const void* data, size_t length) {
SkASSERT(SkIsAlign4(length));
fData.reset(length / 4);
memcpy(fData.begin(), data, length);
}
SkString description() const {
#ifdef SK_DEBUG
return fDescription;
#else
return SkString{};
#endif
}
private:
enum {
kHeaderSize = 1, // "header" in ::Build
kMaxPreallocProcessors = 8,
kIntsPerProcessor = 4, // This is an overestimate of the average effect key size.
kPreAllocSize = kHeaderSize +
kMaxPreallocProcessors * kIntsPerProcessor,
};
SkSTArray<kPreAllocSize, uint32_t, true> fData;
SkTArray<uint32_t, true>* fData;
uint32_t fCurValue = 0;
uint32_t fBitsUsed = 0; // ... in current value
};
SkDEBUGCODE(SkString fDescription;)
class GrProcessorStringKeyBuilder : public GrProcessorKeyBuilder {
public:
GrProcessorStringKeyBuilder(SkTArray<uint32_t, true>* data) : INHERITED(data) {}
void addBits(uint32_t numBits, uint32_t val, const char* label) override {
INHERITED::addBits(numBits, val, label);
fDescription.appendf("%s: %u\n", label, val);
}
void appendComment(const char* comment) override {
fDescription.appendf("%s\n", comment);
}
SkString description() const { return fDescription; }
private:
using INHERITED = GrProcessorKeyBuilder;
SkString fDescription;
};
/** This class is used to generate a generic program cache key. The Dawn, Metal and Vulkan
@ -142,6 +108,7 @@ public:
GrProgramDesc(const GrProgramDesc& other) = default;
bool isValid() const { return !fKey.empty(); }
void reset() { *this = GrProgramDesc{}; }
// Returns this as a uint32_t array to be used as a key in the program cache.
const uint32_t* asKey() const {
@ -150,14 +117,9 @@ public:
// Gets the number of bytes in asKey(). It will be a 4-byte aligned value.
uint32_t keyLength() const {
SkASSERT(0 == (fKey.size() % 4));
return fKey.size();
return fKey.size() * sizeof(uint32_t);
}
SkString description() const { return fKey.description(); }
GrProgramDesc& operator= (const GrProgramDesc& other) = default;
bool operator== (const GrProgramDesc& that) const {
return this->fKey == that.fKey;
}
@ -168,6 +130,10 @@ public:
uint32_t initialKeyLength() const { return fInitialKeyLength; }
// TODO(skia:11372): Incorporate this into caps interface (part of makeDesc, or a parallel
// function), so other backends can include their information in the description.
static SkString Describe(GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
protected:
friend class GrDawnCaps;
friend class GrD3DCaps;
@ -189,21 +155,32 @@ protected:
* @param programInfo Program information need to build the key
* @param caps the caps
**/
static bool Build(GrProgramDesc*, GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
static void Build(GrProgramDesc*, GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
// This is strictly an OpenGL call since the other backends have additional data in their keys.
static bool BuildFromData(GrProgramDesc* desc, const void* keyData, size_t keyLength) {
if (!SkTFitsIn<int>(keyLength)) {
if (!SkTFitsIn<int>(keyLength) || !SkIsAlign4(keyLength)) {
return false;
}
desc->fKey.setData(keyData, keyLength);
desc->fKey.reset(keyLength / 4);
memcpy(desc->fKey.begin(), keyData, keyLength);
return true;
}
GrProcessorKeyBuilder* key() { return &fKey; }
enum {
kHeaderSize = 1, // "header" in ::Build
kMaxPreallocProcessors = 8,
kIntsPerProcessor = 4, // This is an overestimate of the average effect key size.
kPreAllocSize = kHeaderSize +
kMaxPreallocProcessors * kIntsPerProcessor,
};
using KeyType = SkSTArray<kPreAllocSize, uint32_t, true>;
KeyType* key() { return &fKey; }
private:
GrProcessorKeyBuilder fKey;
SkSTArray<kPreAllocSize, uint32_t, true> fKey;
uint32_t fInitialKeyLength = 0;
};

View File

@ -1036,29 +1036,26 @@ GrProgramDesc GrD3DCaps::makeDesc(GrRenderTarget* rt,
ProgramDescOverrideFlags overrideFlags) const {
SkASSERT(overrideFlags == ProgramDescOverrideFlags::kNone);
GrProgramDesc desc;
if (!GrProgramDesc::Build(&desc, rt, programInfo, *this)) {
SkASSERT(!desc.isValid());
return desc;
}
GrProgramDesc::Build(&desc, rt, programInfo, *this);
GrProcessorKeyBuilder* b = desc.key();
GrProcessorKeyBuilder b(desc.key());
GrD3DRenderTarget* d3dRT = (GrD3DRenderTarget*) rt;
d3dRT->genKey(b);
d3dRT->genKey(&b);
GrStencilSettings stencil = programInfo.nonGLStencilSettings();
stencil.genKey(b, false);
stencil.genKey(&b, false);
programInfo.pipeline().genKey(b, *this);
programInfo.pipeline().genKey(&b, *this);
// The num samples is already added in the render target key so we don't need to add it here.
SkASSERT(programInfo.numRasterSamples() == rt->numSamples());
// D3D requires the full primitive type as part of its key
b->add32(programInfo.primitiveTypeKey());
b.add32(programInfo.primitiveTypeKey());
SkASSERT(!this->mixedSamplesSupport());
b->flush();
b.flush();
return desc;
}

View File

@ -630,7 +630,8 @@ sk_sp<GrD3DPipelineState> GrD3DPipelineStateBuilder::finalize() {
}
sk_sp<SkData> key =
SkData::MakeWithoutCopy(this->desc().asKey(), this->desc().initialKeyLength());
const SkString& description = this->desc().description();
SkString description =
GrProgramDesc::Describe(fRenderTarget, fProgramInfo, *this->caps());
sk_sp<SkData> data = GrPersistentCacheUtils::PackCachedShaders(
cacheSkSL ? kSKSL_Tag : kHLSL_Tag, hlsl, inputs, kGrShaderTypeCount);
persistentCache->store(*key, *data, description);

View File

@ -169,31 +169,28 @@ GrProgramDesc GrDawnCaps::makeDesc(GrRenderTarget* rt,
ProgramDescOverrideFlags overrideFlags) const {
SkASSERT(overrideFlags == ProgramDescOverrideFlags::kNone);
GrProgramDesc desc;
if (!GrProgramDesc::Build(&desc, rt, programInfo, *this)) {
SkASSERT(!desc.isValid());
return desc;
}
GrProgramDesc::Build(&desc, rt, programInfo, *this);
GrProcessorKeyBuilder* b = desc.key();
wgpu::TextureFormat format;
if (!programInfo.backendFormat().asDawnFormat(&format)) {
b->reset();
desc.reset();
SkASSERT(!desc.isValid());
return desc;
}
GrProcessorKeyBuilder b(desc.key());
GrStencilSettings stencil = programInfo.nonGLStencilSettings();
stencil.genKey(b, true);
stencil.genKey(&b, true);
// TODO: remove this reliance on the renderTarget
bool hasDepthStencil = rt->getStencilAttachment() != nullptr;
b->add32(static_cast<uint32_t>(format));
b->add32(static_cast<int32_t>(hasDepthStencil));
b->add32(get_blend_info_key(programInfo.pipeline()));
b->add32(programInfo.primitiveTypeKey());
b.add32(static_cast<uint32_t>(format));
b.add32(static_cast<int32_t>(hasDepthStencil));
b.add32(get_blend_info_key(programInfo.pipeline()));
b.add32(programInfo.primitiveTypeKey());
b->flush();
b.flush();
return desc;
}

View File

@ -4590,8 +4590,7 @@ GrProgramDesc GrGLCaps::makeDesc(GrRenderTarget* rt,
ProgramDescOverrideFlags overrideFlags) const {
SkASSERT(overrideFlags == ProgramDescOverrideFlags::kNone);
GrProgramDesc desc;
SkDEBUGCODE(bool result =) GrProgramDesc::Build(&desc, rt, programInfo, *this);
SkASSERT(result == desc.isValid());
GrProgramDesc::Build(&desc, rt, programInfo, *this);
return desc;
}

View File

@ -168,7 +168,7 @@ void GrGLProgramBuilder::storeShaderInCache(const SkSL::Program::Inputs& inputs,
return;
}
sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(), this->desc().keyLength());
const SkString& description = this->desc().description();
SkString description = GrProgramDesc::Describe(fRenderTarget, fProgramInfo, *fGpu->caps());
if (fGpu->glCaps().programBinarySupport()) {
// binary cache
GrGLsizei length = 0;

View File

@ -14,8 +14,7 @@ GrProgramDesc GrMockCaps::makeDesc(GrRenderTarget* rt,
ProgramDescOverrideFlags overrideFlags) const {
SkASSERT(overrideFlags == ProgramDescOverrideFlags::kNone);
GrProgramDesc desc;
SkDEBUGCODE(bool result =) GrProgramDesc::Build(&desc, rt, programInfo, *this);
SkASSERT(result == desc.isValid());
GrProgramDesc::Build(&desc, rt, programInfo, *this);
return desc;
}

View File

@ -1081,16 +1081,13 @@ GrProgramDesc GrMtlCaps::makeDesc(GrRenderTarget* rt,
ProgramDescOverrideFlags overrideFlags) const {
SkASSERT(overrideFlags == ProgramDescOverrideFlags::kNone);
GrProgramDesc desc;
if (!GrProgramDesc::Build(&desc, rt, programInfo, *this)) {
SkASSERT(!desc.isValid());
return desc;
}
GrProgramDesc::Build(&desc, rt, programInfo, *this);
GrProcessorKeyBuilder* b = desc.key();
GrProcessorKeyBuilder b(desc.key());
b->add32(programInfo.backendFormat().asMtlFormat());
b.add32(programInfo.backendFormat().asMtlFormat());
b->add32(programInfo.numRasterSamples());
b.add32(programInfo.numRasterSamples());
#ifdef SK_DEBUG
if (rt && programInfo.isStencilEnabled()) {
@ -1098,16 +1095,16 @@ GrProgramDesc GrMtlCaps::makeDesc(GrRenderTarget* rt,
}
#endif
b->add32(rt && rt->getStencilAttachment() ? this->preferredStencilFormat()
b.add32(rt && rt->getStencilAttachment() ? this->preferredStencilFormat()
: MTLPixelFormatInvalid);
b->add32((uint32_t)programInfo.isStencilEnabled());
b.add32((uint32_t)programInfo.isStencilEnabled());
// Stencil samples don't seem to be tracked in the MTLRenderPipeline
programInfo.pipeline().genKey(b, *this);
programInfo.pipeline().genKey(&b, *this);
b->add32(programInfo.primitiveTypeKey());
b.add32(programInfo.primitiveTypeKey());
b->flush();
b.flush();
return desc;
}

View File

@ -77,7 +77,10 @@ void GrMtlPipelineStateBuilder::storeShadersInCache(const SkSL::String shaders[]
// program, and that only depends on the base GrProgramDesc data.
sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(),
this->desc().initialKeyLength());
const SkString& description = this->desc().description();
// TODO(skia:11372): We don't have the render target here, so pass null. This just means that
// any render-target state won't be accurately included in the description.
SkString description =
GrProgramDesc::Describe(/*renderTarget=*/nullptr, fProgramInfo, *this->caps());
sk_sp<SkData> data;
if (isSkSL) {
// source cache, plus metadata to allow for a complete precompile

View File

@ -1774,19 +1774,16 @@ GrProgramDesc GrVkCaps::makeDesc(GrRenderTarget* rt,
const GrProgramInfo& programInfo,
ProgramDescOverrideFlags overrideFlags) const {
GrProgramDesc desc;
if (!GrProgramDesc::Build(&desc, rt, programInfo, *this)) {
SkASSERT(!desc.isValid());
return desc;
}
GrProgramDesc::Build(&desc, rt, programInfo, *this);
GrProcessorKeyBuilder* b = desc.key();
GrProcessorKeyBuilder b(desc.key());
// This will become part of the sheared off key used to persistently cache
// the SPIRV code. It needs to be added right after the base key so that,
// when the base-key is sheared off, the shearing code can include it in the
// reduced key (c.f. the +4s in the SkData::MakeWithCopy calls in
// GrVkPipelineStateBuilder.cpp).
b->add32(GrVkGpu::kShader_PersistentCacheKeyType);
b.add32(GrVkGpu::kShader_PersistentCacheKeyType);
GrVkRenderPass::SelfDependencyFlags selfDepFlags = GrVkRenderPass::SelfDependencyFlags::kNone;
if (programInfo.renderPassBarriers() & GrXferBarrierFlags::kBlend) {
@ -1819,7 +1816,7 @@ GrProgramDesc GrVkCaps::makeDesc(GrRenderTarget* rt,
const GrVkRenderPass* rp = vkRT->getSimpleRenderPass(needsResolve, needsStencil,
selfDepFlags, loadFromResolve);
SkASSERT(rp);
rp->genKey(b);
rp->genKey(&b);
#ifdef SK_DEBUG
if (!rp->isExternal()) {
@ -1844,26 +1841,26 @@ GrProgramDesc GrVkCaps::makeDesc(GrRenderTarget* rt,
// kExternal_AttachmentFlag is only set for wrapped secondary command buffers - which
// will always go through the above 'rt' path (i.e., we can always pass 0 as the final
// parameter to GenKey).
GrVkRenderPass::GenKey(b, attachmentFlags, attachmentsDescriptor, selfDepFlags,
GrVkRenderPass::GenKey(&b, attachmentFlags, attachmentsDescriptor, selfDepFlags,
loadFromResolve, 0);
}
GrStencilSettings stencil = programInfo.nonGLStencilSettings();
stencil.genKey(b, true);
stencil.genKey(&b, true);
programInfo.pipeline().genKey(b, *this);
b->add32(programInfo.numRasterSamples());
programInfo.pipeline().genKey(&b, *this);
b.add32(programInfo.numRasterSamples());
// Vulkan requires the full primitive type as part of its key
b->add32(programInfo.primitiveTypeKey());
b.add32(programInfo.primitiveTypeKey());
if (this->mixedSamplesSupport()) {
// Add "0" to indicate that coverage modulation will not be enabled, or the (non-zero)
// raster sample count if it will.
b->add32(!programInfo.isMixedSampled() ? 0 : programInfo.numRasterSamples());
b.add32(!programInfo.isMixedSampled() ? 0 : programInfo.numRasterSamples());
}
b->flush();
b.flush();
return desc;
}

View File

@ -162,7 +162,7 @@ void GrVkPipelineStateBuilder::storeShadersInCache(const SkSL::String shaders[],
// to the key right after the base key.
sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(),
this->desc().initialKeyLength()+4);
const SkString& description = this->desc().description();
SkString description = GrProgramDesc::Describe(fRenderTarget, fProgramInfo, *this->caps());
sk_sp<SkData> data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kSPIRV_Tag,
shaders,