Make GrMemoryPool play nice with bucketing allocators.

Some memory allocators have very coarse size buckets, so for example on
Android (jemalloc) an attempt to allocate 32 KiB + 1 byte will end up
allocating 40 KiB, wasting 8 KiB.

GrMemoryPool ctor takes two arguments that specify prealloc / block sizes,
and then inflates them to accommodate some bookkeeping structures. Since
most places create GrMemoryPools with pow2 numbers (which have buckets in
most allocators) the inflation causes allocator to select next size bucket,
wasting memory.

This CL makes GrMemoryPool to stop inflating sizes it was created with, and
allocate specified amounts exactly. Part of allocated memory is then used for
bookkeeping structures. Additionally, GrObjectMemoryPool template is provided,
which takes prealloc / block object counts (instead of sizes) and guarantees
that specified number of objects will fit in prealloc / block spaces.

BUG=651872
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2525773002

Review-Url: https://codereview.chromium.org/2525773002
This commit is contained in:
dskiba 2016-11-29 06:50:35 -08:00 committed by Commit bot
parent c51c18fd78
commit e4cd006991
5 changed files with 284 additions and 30 deletions

View File

@ -13,17 +13,19 @@
#define VALIDATE
#endif
constexpr size_t GrMemoryPool::kSmallestMinAllocSize;
GrMemoryPool::GrMemoryPool(size_t preallocSize, size_t minAllocSize) {
SkDEBUGCODE(fAllocationCnt = 0);
SkDEBUGCODE(fAllocBlockCnt = 0);
minAllocSize = SkTMax<size_t>(minAllocSize, 1 << 10);
fMinAllocSize = GrSizeAlignUp(minAllocSize + kPerAllocPad, kAlignment);
fPreallocSize = GrSizeAlignUp(preallocSize + kPerAllocPad, kAlignment);
fPreallocSize = SkTMax(fPreallocSize, fMinAllocSize);
minAllocSize = SkTMax<size_t>(GrSizeAlignUp(minAllocSize, kAlignment), kSmallestMinAllocSize);
preallocSize = SkTMax<size_t>(GrSizeAlignUp(preallocSize, kAlignment), minAllocSize);
fMinAllocSize = minAllocSize;
fSize = 0;
fHead = CreateBlock(fPreallocSize);
fHead = CreateBlock(preallocSize);
fTail = fHead;
fHead->fNext = nullptr;
fHead->fPrev = nullptr;
@ -43,7 +45,7 @@ void* GrMemoryPool::allocate(size_t size) {
size += kPerAllocPad;
size = GrSizeAlignUp(size, kAlignment);
if (fTail->fFreeSize < size) {
size_t blockSize = size;
size_t blockSize = size + kHeaderSize;
blockSize = SkTMax<size_t>(blockSize, fMinAllocSize);
BlockHeader* block = CreateBlock(blockSize);
@ -87,7 +89,7 @@ void GrMemoryPool::release(void* p) {
if (fHead == block) {
fHead->fCurrPtr = reinterpret_cast<intptr_t>(fHead) + kHeaderSize;
fHead->fLiveCount = 0;
fHead->fFreeSize = fPreallocSize;
fHead->fFreeSize = fHead->fSize - kHeaderSize;
} else {
BlockHeader* prev = block->fPrev;
BlockHeader* next = block->fNext;
@ -115,18 +117,18 @@ void GrMemoryPool::release(void* p) {
VALIDATE;
}
GrMemoryPool::BlockHeader* GrMemoryPool::CreateBlock(size_t size) {
size_t paddedSize = size + kHeaderSize;
GrMemoryPool::BlockHeader* GrMemoryPool::CreateBlock(size_t blockSize) {
blockSize = SkTMax<size_t>(blockSize, kHeaderSize);
BlockHeader* block =
reinterpret_cast<BlockHeader*>(sk_malloc_throw(paddedSize));
reinterpret_cast<BlockHeader*>(sk_malloc_throw(blockSize));
// we assume malloc gives us aligned memory
SkASSERT(!(reinterpret_cast<intptr_t>(block) % kAlignment));
SkDEBUGCODE(block->fBlockSentinal = kAssignedMarker);
block->fLiveCount = 0;
block->fFreeSize = size;
block->fFreeSize = blockSize - kHeaderSize;
block->fCurrPtr = reinterpret_cast<intptr_t>(block) + kHeaderSize;
block->fPrevPtr = 0; // gcc warns on assigning nullptr to an intptr_t.
block->fSize = paddedSize;
block->fSize = blockSize;
return block;
}
@ -153,18 +155,16 @@ void GrMemoryPool::validate() {
intptr_t b = reinterpret_cast<intptr_t>(block);
size_t ptrOffset = block->fCurrPtr - b;
size_t totalSize = ptrOffset + block->fFreeSize;
size_t userSize = totalSize - kHeaderSize;
intptr_t userStart = b + kHeaderSize;
SkASSERT(!(b % kAlignment));
SkASSERT(!(totalSize % kAlignment));
SkASSERT(!(userSize % kAlignment));
SkASSERT(!(block->fCurrPtr % kAlignment));
if (fHead != block) {
SkASSERT(block->fLiveCount);
SkASSERT(userSize >= fMinAllocSize);
SkASSERT(totalSize >= fMinAllocSize);
} else {
SkASSERT(userSize == fPreallocSize);
SkASSERT(totalSize == block->fSize);
}
if (!block->fLiveCount) {
SkASSERT(ptrOffset == kHeaderSize);

View File

@ -13,16 +13,23 @@
/**
* Allocates memory in blocks and parcels out space in the blocks for allocation
* requests. It is optimized for allocate / release speed over memory
* effeciency. The interface is designed to be used to implement operator new
* efficiency. The interface is designed to be used to implement operator new
* and delete overrides. All allocations are expected to be released before the
* pool's destructor is called. Allocations will be 8-byte aligned.
*/
class GrMemoryPool {
public:
/**
* Prealloc size is the amount of space to make available at pool creation
* time and keep around until pool destruction. The min alloc size is the
* smallest allowed size of additional allocations.
* Prealloc size is the amount of space to allocate at pool creation
* time and keep around until pool destruction. The min alloc size is
* the smallest allowed size of additional allocations. Both sizes are
* adjusted to ensure that:
* 1. they are are 8-byte aligned
* 2. minAllocSize >= kSmallestMinAllocSize
* 3. preallocSize >= minAllocSize
*
* Both sizes is what the pool will end up allocating from the system, and
* portions of the allocated memory is used for internal bookkeeping.
*/
GrMemoryPool(size_t preallocSize, size_t minAllocSize);
@ -48,6 +55,16 @@ public:
*/
size_t size() const { return fSize; }
/**
* Returns the preallocated size of the GrMemoryPool
*/
size_t preallocSize() const { return fHead->fSize; }
/**
* Minimum value of minAllocSize constructor argument.
*/
constexpr static size_t kSmallestMinAllocSize = 1 << 10;
private:
struct BlockHeader;
@ -81,14 +98,7 @@ private:
BlockHeader* fHeader; ///< pointer back to the block header in which an alloc resides
};
enum {
// We assume this alignment is good enough for everybody.
kAlignment = 8,
kHeaderSize = GR_CT_ALIGN_UP(sizeof(BlockHeader), kAlignment),
kPerAllocPad = GR_CT_ALIGN_UP(sizeof(AllocHeader), kAlignment),
};
size_t fSize;
size_t fPreallocSize;
size_t fMinAllocSize;
BlockHeader* fHead;
BlockHeader* fTail;
@ -96,6 +106,79 @@ private:
int fAllocationCnt;
int fAllocBlockCnt;
#endif
protected:
enum {
// We assume this alignment is good enough for everybody.
kAlignment = 8,
kHeaderSize = GR_CT_ALIGN_UP(sizeof(BlockHeader), kAlignment),
kPerAllocPad = GR_CT_ALIGN_UP(sizeof(AllocHeader), kAlignment),
};
};
/**
* Variant of GrMemoryPool that can only allocate objects of a single type. It is
* not as flexible as GrMemoryPool, but it has more convenient allocate() method,
* and more importantly, it guarantees number of objects that are preallocated at
* construction or when adding a new memory block. I.e.
*
* GrMemoryPool pool(3 * sizeof(T), 1000 * sizeof(T));
* pool.allocate(sizeof(T));
* pool.allocate(sizeof(T));
* pool.allocate(sizeof(T));
*
* will preallocate 3 * sizeof(T) bytes and use some of those bytes for internal
* structures. Because of that, last allocate() call will end up allocating a new
* block of 1000 * sizeof(T) bytes. In contrast,
*
* GrObjectMemoryPool<T> pool(3, 1000);
* pool.allocate();
* pool.allocate();
* pool.allocate();
*
* guarantees to preallocate enough memory for 3 objects of sizeof(T), so last
* allocate() will use preallocated memory and won't cause allocation of a new block.
*
* Same thing is true for the second (minAlloc) ctor argument: this class guarantees
* that a newly added block will have enough space for 1000 objects of sizeof(T), while
* GrMemoryPool does not.
*/
template <class T>
class GrObjectMemoryPool: public GrMemoryPool {
public:
/**
* Preallocates memory for preallocCount objects, and sets new block size to be
* enough to hold minAllocCount objects.
*/
GrObjectMemoryPool(size_t preallocCount, size_t minAllocCount)
: GrMemoryPool(CountToSize(preallocCount),
CountToSize(SkTMax(minAllocCount, kSmallestMinAllocCount))) {
}
/**
* Allocates memory for an object, but doesn't construct or otherwise initialize it.
* The memory must be freed with release().
*/
T* allocate() { return static_cast<T*>(GrMemoryPool::allocate(sizeof(T))); }
private:
constexpr static size_t kTotalObjectSize =
kPerAllocPad + GR_CT_ALIGN_UP(sizeof(T), kAlignment);
constexpr static size_t CountToSize(size_t count) {
return kHeaderSize + count * kTotalObjectSize;
}
public:
/**
* Minimum value of minAllocCount constructor argument.
*/
constexpr static size_t kSmallestMinAllocCount =
(GrMemoryPool::kSmallestMinAllocSize - kHeaderSize + kTotalObjectSize - 1) /
kTotalObjectSize;
};
template <class T>
constexpr size_t GrObjectMemoryPool<T>::kSmallestMinAllocCount;
#endif

View File

@ -18,7 +18,7 @@ namespace gr_instanced {
InstancedRendering::InstancedRendering(GrGpu* gpu)
: fGpu(SkRef(gpu)),
fState(State::kRecordingDraws),
fDrawPool(1024 * sizeof(Batch::Draw), 1024 * sizeof(Batch::Draw)) {
fDrawPool(1024, 1024) {
}
GrDrawBatch* InstancedRendering::recordRect(const SkRect& rect, const SkMatrix& viewMatrix,
@ -241,7 +241,7 @@ InstancedRendering::Batch::Batch(uint32_t classID, InstancedRendering* ir)
fIsTracked(false),
fNumDraws(1),
fNumChangesInGeometry(0) {
fHeadDraw = fTailDraw = (Draw*)fInstancedRendering->fDrawPool.allocate(sizeof(Draw));
fHeadDraw = fTailDraw = fInstancedRendering->fDrawPool.allocate();
#ifdef SK_DEBUG
fHeadDraw->fGeometry = {-1, 0};
#endif

View File

@ -173,7 +173,7 @@ private:
const sk_sp<GrGpu> fGpu;
State fState;
GrMemoryPool fDrawPool;
GrObjectMemoryPool<Batch::Draw> fDrawPool;
SkSTArray<1024, ParamsTexel, true> fParams;
BatchList fTrackedBatches;
sk_sp<const GrBuffer> fVertexBuffer;

View File

@ -10,6 +10,7 @@
#if SK_SUPPORT_GPU
#include "GrMemoryPool.h"
#include "SkRandom.h"
#include "SkTArray.h"
#include "SkTDArray.h"
#include "SkTemplates.h"
@ -227,4 +228,174 @@ DEF_TEST(GrMemoryPool, reporter) {
}
}
// GrMemoryPool requires that it's empty at the point of destruction. This helps
// achieving that by releasing all added memory in the destructor.
class AutoPoolReleaser {
public:
AutoPoolReleaser(GrMemoryPool& pool): fPool(pool) {
}
~AutoPoolReleaser() {
for (void* ptr: fAllocated) {
fPool.release(ptr);
}
}
void add(void* ptr) {
fAllocated.push_back(ptr);
}
private:
GrMemoryPool& fPool;
SkTArray<void*> fAllocated;
};
DEF_TEST(GrMemoryPoolAPI, reporter) {
constexpr size_t kSmallestMinAllocSize = GrMemoryPool::kSmallestMinAllocSize;
// Allocates memory until pool adds a new block (pool.size() changes).
auto allocateMemory = [](GrMemoryPool& pool, AutoPoolReleaser& r) {
size_t origPoolSize = pool.size();
while (pool.size() == origPoolSize) {
r.add(pool.allocate(31));
}
};
// Effective prealloc space capacity is >= kSmallestMinAllocSize.
{
GrMemoryPool pool(0, 0);
REPORTER_ASSERT(reporter, pool.preallocSize() == kSmallestMinAllocSize);
}
// Effective prealloc space capacity is >= minAllocSize.
{
constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 2;
GrMemoryPool pool(kSmallestMinAllocSize, kMinAllocSize);
REPORTER_ASSERT(reporter, pool.preallocSize() == kMinAllocSize);
}
// Effective block size capacity >= kSmallestMinAllocSize.
{
GrMemoryPool pool(kSmallestMinAllocSize, kSmallestMinAllocSize / 2);
AutoPoolReleaser r(pool);
allocateMemory(pool, r);
REPORTER_ASSERT(reporter, pool.size() == kSmallestMinAllocSize);
}
// Pool allocates exactly preallocSize on creation.
{
constexpr size_t kPreallocSize = kSmallestMinAllocSize * 5;
GrMemoryPool pool(kPreallocSize, 0);
REPORTER_ASSERT(reporter, pool.preallocSize() == kPreallocSize);
}
// Pool allocates exactly minAllocSize when it expands.
{
constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 7;
GrMemoryPool pool(0, kMinAllocSize);
AutoPoolReleaser r(pool);
allocateMemory(pool, r);
REPORTER_ASSERT(reporter, pool.size() == kMinAllocSize);
allocateMemory(pool, r);
REPORTER_ASSERT(reporter, pool.size() == 2 * kMinAllocSize);
}
// When asked to allocate amount > minAllocSize, pool allocates larger block
// to accommodate all internal structures.
{
constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 2;
GrMemoryPool pool(kSmallestMinAllocSize, kMinAllocSize);
AutoPoolReleaser r(pool);
REPORTER_ASSERT(reporter, pool.size() == 0);
constexpr size_t hugeSize = 10 * kMinAllocSize;
r.add(pool.allocate(hugeSize));
REPORTER_ASSERT(reporter, pool.size() > hugeSize);
// Block size allocated to accommodate huge request doesn't include any extra
// space, so next allocation request allocates a new block.
size_t hugeBlockSize = pool.size();
r.add(pool.allocate(0));
REPORTER_ASSERT(reporter, pool.size() == hugeBlockSize + kMinAllocSize);
}
}
DEF_TEST(GrObjectMemoryPoolAPI, reporter) {
struct Data {
int value[5];
};
using DataObjectPool = GrObjectMemoryPool<Data>;
constexpr size_t kSmallestMinAllocCount = DataObjectPool::kSmallestMinAllocCount;
// Allocates objects until pool adds a new block (pool.size() changes).
// Returns number of objects that fit into the current block (i.e. before pool.size()
// changed; newly allocated block always ends up with one object allocated from it).
auto allocateObjects = [](DataObjectPool& pool, AutoPoolReleaser& r) -> size_t {
size_t count = 0;
size_t origPoolSize = pool.size();
while (pool.size() == origPoolSize) {
r.add(pool.allocate());
count++;
}
return count - 1;
};
// Effective prealloc space capacity is >= kSmallestMinAllocCount.
{
DataObjectPool pool(kSmallestMinAllocCount / 3, 0);
AutoPoolReleaser r(pool);
size_t preallocCount = allocateObjects(pool, r);
REPORTER_ASSERT(reporter, preallocCount == kSmallestMinAllocCount);
}
// Effective prealloc space capacity is >= minAllocCount.
{
DataObjectPool pool(kSmallestMinAllocCount, 2 * kSmallestMinAllocCount);
AutoPoolReleaser r(pool);
size_t preallocCount = allocateObjects(pool, r);
REPORTER_ASSERT(reporter, preallocCount == 2 * kSmallestMinAllocCount);
}
// Effective block capacity is >= kSmallestMinAllocCount.
{
DataObjectPool pool(kSmallestMinAllocCount, kSmallestMinAllocCount / 2);
AutoPoolReleaser r(pool);
// Fill prealloc space
allocateObjects(pool, r);
size_t minAllocCount = 1 + allocateObjects(pool, r);
REPORTER_ASSERT(reporter, minAllocCount == kSmallestMinAllocCount);
}
// Pool allocates space for exactly preallocCount objects on creation.
{
constexpr size_t kPreallocCount = kSmallestMinAllocCount * 7 / 3;
DataObjectPool pool(kPreallocCount, 0);
AutoPoolReleaser r(pool);
size_t preallocCount = allocateObjects(pool, r);
REPORTER_ASSERT(reporter, preallocCount == kPreallocCount);
}
// Pool allocates space for minAllocCount objects when it adds a new block.
{
constexpr size_t kMinAllocCount = kSmallestMinAllocCount * 11 / 3;
DataObjectPool pool(0, kMinAllocCount);
AutoPoolReleaser r(pool);
// Fill prealloc space
allocateObjects(pool, r);
size_t firstBlockCount = 1 + allocateObjects(pool, r);
REPORTER_ASSERT(reporter, firstBlockCount == kMinAllocCount);
size_t secondBlockCount = 1 + allocateObjects(pool, r);
REPORTER_ASSERT(reporter, secondBlockCount == kMinAllocCount);
}
}
#endif