skia2/include/gpu/GrResourceKey.h
kkinnunen 54b8511189 Make GrResourceCache perf less sensitive to key length change
Make GrResourceCache performance less sensitive to key length change.
The memcmp in GrResourceKey is called when SkTDynamicHash jumps the
slots to find the hash by a index. Avoid most of the memcmps by
comparing the hash first.

This is important because small changes in key data length can cause
big performance regressions. The theory is that key length change causes
different hash values. These hash values might trigger memcmps that
originally weren't there, causing the regression.

Adds few specialized benches to grresourcecache_add to test different
key lengths. The tests are run only on release, because on debug the
SkTDynamicHash validation takes too long, and adding many such delays
to development test runs would be unproductive. On release the tests
are quite fast.

Effect of this patch to the added tests on amd64:
grresourcecache_find_10  738us ->  768us        1.04x
 grresourcecache_find_2  472us ->  476us        1.01x
grresourcecache_find_25  841us ->  845us        1x
 grresourcecache_find_4  565us ->  531us        0.94x
grresourcecache_find_54 1.18ms ->  1.1ms        0.93x
 grresourcecache_find_5  834us ->  749us        0.9x
 grresourcecache_find_3  620us ->  542us        0.87x
 grresourcecache_add_25 2.74ms -> 2.24ms        0.82x
 grresourcecache_add_56 3.23ms -> 2.56ms        0.79x
 grresourcecache_add_54 3.34ms -> 2.62ms        0.78x
  grresourcecache_add_5 2.68ms ->  2.1ms        0.78x
 grresourcecache_add_10  2.7ms -> 2.11ms        0.78x
  grresourcecache_add_2 1.85ms -> 1.41ms        0.76x
    grresourcecache_add 1.84ms ->  1.4ms        0.76x
  grresourcecache_add_4 1.99ms -> 1.49ms        0.75x
  grresourcecache_add_3 2.11ms -> 1.55ms        0.73x
 grresourcecache_add_55   39ms -> 13.9ms        0.36x
grresourcecache_find_55 23.2ms -> 6.21ms        0.27x

On arm64 the results are similar.

On arm_v7_neon, the results lack the discontinuity at 55:
    grresourcecache_add 4.06ms -> 4.26ms        1.05x
  grresourcecache_add_2 4.05ms -> 4.23ms        1.05x
   grresourcecache_find 1.28ms ->  1.3ms        1.02x
grresourcecache_find_56 3.35ms -> 3.32ms        0.99x
 grresourcecache_find_2 1.31ms -> 1.29ms        0.99x
grresourcecache_find_54 3.28ms -> 3.24ms        0.99x
  grresourcecache_add_5 6.38ms -> 6.26ms        0.98x
 grresourcecache_add_55 8.44ms -> 8.24ms        0.98x
 grresourcecache_add_25 7.03ms -> 6.86ms        0.98x
grresourcecache_find_25  2.7ms -> 2.59ms        0.96x
 grresourcecache_find_4 1.45ms -> 1.38ms        0.95x
grresourcecache_find_10 2.52ms -> 2.39ms        0.95x
grresourcecache_find_55 3.54ms -> 3.33ms        0.94x
 grresourcecache_find_5  2.5ms -> 2.32ms        0.93x
 grresourcecache_find_3 1.57ms -> 1.43ms        0.91x

The extremely slow case, 55, is postulated to be due to the index jump
collisions running the memcmp. This is not visible on arm_v7_neon probably due
to hash function producing different results for 32 bit architectures.

This change is needed for extending path cache key in Gr
NV_path_rendering codepath. Extending is needed in order to add dashed
paths to the path cache.

Review URL: https://codereview.chromium.org/1132723003
2015-05-18 22:47:33 -07:00

309 lines
11 KiB
C++

/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef GrResourceKey_DEFINED
#define GrResourceKey_DEFINED
#include "GrTypes.h"
#include "SkOnce.h"
#include "SkTemplates.h"
uint32_t GrResourceKeyHash(const uint32_t* data, size_t size);
/**
* Base class for all GrGpuResource cache keys. There are two types of cache keys. Refer to the
* comments for each key type below.
*/
class GrResourceKey {
public:
uint32_t hash() const {
this->validate();
return fKey[kHash_MetaDataIdx];
}
size_t size() const {
this->validate();
SkASSERT(this->isValid());
return this->internalSize();
}
protected:
static const uint32_t kInvalidDomain = 0;
GrResourceKey() { this->reset(); }
/** Reset to an invalid key. */
void reset() {
GR_STATIC_ASSERT((uint16_t)kInvalidDomain == kInvalidDomain);
fKey.reset(kMetaDataCnt);
fKey[kHash_MetaDataIdx] = 0;
fKey[kDomainAndSize_MetaDataIdx] = kInvalidDomain;
}
bool operator==(const GrResourceKey& that) const {
return this->hash() == that.hash() &&
0 == memcmp(&fKey[kHash_MetaDataIdx + 1],
&that.fKey[kHash_MetaDataIdx + 1],
this->internalSize() - sizeof(uint32_t));
}
GrResourceKey& operator=(const GrResourceKey& that) {
SkASSERT(that.isValid());
if (this != &that) {
size_t bytes = that.size();
SkASSERT(SkIsAlign4(bytes));
fKey.reset(SkToInt(bytes / sizeof(uint32_t)));
memcpy(fKey.get(), that.fKey.get(), bytes);
this->validate();
}
return *this;
}
bool isValid() const { return kInvalidDomain != this->domain(); }
uint32_t domain() const { return fKey[kDomainAndSize_MetaDataIdx] & 0xffff; }
/** size of the key data, excluding meta-data (hash, domain, etc). */
size_t dataSize() const { return this->size() - 4 * kMetaDataCnt; }
/** ptr to the key data, excluding meta-data (hash, domain, etc). */
const uint32_t* data() const {
this->validate();
return &fKey[kMetaDataCnt];
}
/** Used to initialize a key. */
class Builder {
public:
Builder(GrResourceKey* key, uint32_t domain, int data32Count) : fKey(key) {
SkASSERT(data32Count >= 0);
SkASSERT(domain != kInvalidDomain);
key->fKey.reset(kMetaDataCnt + data32Count);
int size = (data32Count + kMetaDataCnt) * sizeof(uint32_t);
SkASSERT(SkToU16(size) == size);
SkASSERT(SkToU16(domain) == domain);
key->fKey[kDomainAndSize_MetaDataIdx] = domain | (size << 16);
}
~Builder() { this->finish(); }
void finish() {
if (NULL == fKey) {
return;
}
GR_STATIC_ASSERT(0 == kHash_MetaDataIdx);
uint32_t* hash = &fKey->fKey[kHash_MetaDataIdx];
*hash = GrResourceKeyHash(hash + 1, fKey->internalSize() - sizeof(uint32_t));
fKey->validate();
fKey = NULL;
}
uint32_t& operator[](int dataIdx) {
SkASSERT(fKey);
SkDEBUGCODE(size_t dataCount = fKey->internalSize() / sizeof(uint32_t) - kMetaDataCnt;)
SkASSERT(SkToU32(dataIdx) < dataCount);
return fKey->fKey[kMetaDataCnt + dataIdx];
}
private:
GrResourceKey* fKey;
};
private:
enum MetaDataIdx {
kHash_MetaDataIdx,
// The key domain and size are packed into a single uint32_t.
kDomainAndSize_MetaDataIdx,
kLastMetaDataIdx = kDomainAndSize_MetaDataIdx
};
static const uint32_t kMetaDataCnt = kLastMetaDataIdx + 1;
size_t internalSize() const {
return fKey[kDomainAndSize_MetaDataIdx] >> 16;
}
void validate() const {
SkASSERT(fKey[kHash_MetaDataIdx] ==
GrResourceKeyHash(&fKey[kHash_MetaDataIdx] + 1,
this->internalSize() - sizeof(uint32_t)));
SkASSERT(SkIsAlign4(this->internalSize()));
}
friend class TestResource; // For unit test to access kMetaDataCnt.
// bmp textures require 4 uint32_t values.
SkAutoSTMalloc<kMetaDataCnt + 4, uint32_t> fKey;
};
/**
* A key used for scratch resources. There are three important rules about scratch keys:
* * Multiple resources can share the same scratch key. Therefore resources assigned the same
* scratch key should be interchangeable with respect to the code that uses them.
* * A resource can have at most one scratch key and it is set at resource creation by the
* resource itself.
* * When a scratch resource is ref'ed it will not be returned from the
* cache for a subsequent cache request until all refs are released. This facilitates using
* a scratch key for multiple render-to-texture scenarios. An example is a separable blur:
*
* GrTexture* texture[2];
* texture[0] = get_scratch_texture(scratchKey);
* texture[1] = get_scratch_texture(scratchKey); // texture[0] is already owned so we will get a
* // different one for texture[1]
* draw_mask(texture[0], path); // draws path mask to texture[0]
* blur_x(texture[0], texture[1]); // blurs texture[0] in y and stores result in texture[1]
* blur_y(texture[1], texture[0]); // blurs texture[1] in y and stores result in texture[0]
* texture[1]->unref(); // texture 1 can now be recycled for the next request with scratchKey
* consume_blur(texture[0]);
* texture[0]->unref(); // texture 0 can now be recycled for the next request with scratchKey
*/
class GrScratchKey : public GrResourceKey {
private:
typedef GrResourceKey INHERITED;
public:
/** Uniquely identifies the type of resource that is cached as scratch. */
typedef uint32_t ResourceType;
/** Generate a unique ResourceType. */
static ResourceType GenerateResourceType();
/** Creates an invalid scratch key. It must be initialized using a Builder object before use. */
GrScratchKey() {}
GrScratchKey(const GrScratchKey& that) { *this = that; }
/** reset() returns the key to the invalid state. */
using INHERITED::reset;
using INHERITED::isValid;
ResourceType resourceType() const { return this->domain(); }
GrScratchKey& operator=(const GrScratchKey& that) {
this->INHERITED::operator=(that);
return *this;
}
bool operator==(const GrScratchKey& that) const {
return this->INHERITED::operator==(that);
}
bool operator!=(const GrScratchKey& that) const { return !(*this == that); }
class Builder : public INHERITED::Builder {
public:
Builder(GrScratchKey* key, ResourceType type, int data32Count)
: INHERITED::Builder(key, type, data32Count) {}
};
};
/**
* A key that allows for exclusive use of a resource for a use case (AKA "domain"). There are three
* rules governing the use of unique keys:
* * Only one resource can have a given unique key at a time. Hence, "unique".
* * A resource can have at most one unique key at a time.
* * Unlike scratch keys, multiple requests for a unique key will return the same
* resource even if the resource already has refs.
* This key type allows a code path to create cached resources for which it is the exclusive user.
* The code path creates a domain which it sets on its keys. This guarantees that there are no
* cross-domain collisions.
*
* Unique keys preempt scratch keys. While a resource has a unique key it is inaccessible via its
* scratch key. It can become scratch again if the unique key is removed.
*/
class GrUniqueKey : public GrResourceKey {
private:
typedef GrResourceKey INHERITED;
public:
typedef uint32_t Domain;
/** Generate a Domain for unique keys. */
static Domain GenerateDomain();
/** Creates an invalid unique key. It must be initialized using a Builder object before use. */
GrUniqueKey() {}
GrUniqueKey(const GrUniqueKey& that) { *this = that; }
/** reset() returns the key to the invalid state. */
using INHERITED::reset;
using INHERITED::isValid;
GrUniqueKey& operator=(const GrUniqueKey& that) {
this->INHERITED::operator=(that);
return *this;
}
bool operator==(const GrUniqueKey& that) const {
return this->INHERITED::operator==(that);
}
bool operator!=(const GrUniqueKey& that) const { return !(*this == that); }
class Builder : public INHERITED::Builder {
public:
Builder(GrUniqueKey* key, Domain domain, int data32Count)
: INHERITED::Builder(key, domain, data32Count) {}
/** Used to build a key that wraps another key and adds additional data. */
Builder(GrUniqueKey* key, const GrUniqueKey& innerKey, Domain domain,
int extraData32Cnt)
: INHERITED::Builder(key, domain, Data32CntForInnerKey(innerKey) + extraData32Cnt) {
SkASSERT(&innerKey != key);
// add the inner key to the end of the key so that op[] can be indexed normally.
uint32_t* innerKeyData = &this->operator[](extraData32Cnt);
const uint32_t* srcData = innerKey.data();
(*innerKeyData++) = innerKey.domain();
memcpy(innerKeyData, srcData, innerKey.dataSize());
}
private:
static int Data32CntForInnerKey(const GrUniqueKey& innerKey) {
// key data + domain
return SkToInt((innerKey.dataSize() >> 2) + 1);
}
};
};
/**
* It is common to need a frequently reused GrUniqueKey where the only requirement is that the key
* is unique. These macros create such a key in a thread safe manner so the key can be truly global
* and only constructed once.
*/
/** Place outside of function/class definitions. */
#define GR_DECLARE_STATIC_UNIQUE_KEY(name) SK_DECLARE_STATIC_ONCE(name##_once)
/** Place inside function where the key is used. */
#define GR_DEFINE_STATIC_UNIQUE_KEY(name) \
static GrUniqueKey name; \
SkOnce(&name##_once, gr_init_static_unique_key_once, &name)
static inline void gr_init_static_unique_key_once(GrUniqueKey* key) {
GrUniqueKey::Builder builder(key, GrUniqueKey::GenerateDomain(), 0);
}
// The cache listens for these messages to purge junk resources proactively.
class GrUniqueKeyInvalidatedMessage {
public:
explicit GrUniqueKeyInvalidatedMessage(const GrUniqueKey& key) : fKey(key) {}
GrUniqueKeyInvalidatedMessage(const GrUniqueKeyInvalidatedMessage& that) : fKey(that.fKey) {}
GrUniqueKeyInvalidatedMessage& operator=(const GrUniqueKeyInvalidatedMessage& that) {
fKey = that.fKey;
return *this;
}
const GrUniqueKey& key() const { return fKey; }
private:
GrUniqueKey fKey;
};
#endif