From 6986c6539e8814fdc56e42a604fe8740238a0ee1 Mon Sep 17 00:00:00 2001 From: Brian Salomon Date: Thu, 12 Dec 2019 10:58:47 -0500 Subject: [PATCH] Make Gr[Op]MemoryPool allocate itself into its initial block. Saves one heap allocation per DDL recorded. Change-Id: I9393aedc3b48031cd2ea5f0160b107915077099a Reviewed-on: https://skia-review.googlesource.com/c/skia/+/259419 Commit-Queue: Brian Salomon Reviewed-by: Michael Ludwig --- bench/GrMemoryPoolBench.cpp | 36 +++++++++++------ src/gpu/GrMemoryPool.cpp | 71 +++++++++++++++++++++++++--------- src/gpu/GrMemoryPool.h | 55 +++++++++++++------------- src/gpu/GrProcessor.cpp | 2 +- src/gpu/GrRecordingContext.cpp | 2 +- tests/GrMemoryPoolTest.cpp | 70 ++++++++++++++------------------- 6 files changed, 137 insertions(+), 99 deletions(-) diff --git a/bench/GrMemoryPoolBench.cpp b/bench/GrMemoryPoolBench.cpp index 4d52662abd..f2486a5843 100644 --- a/bench/GrMemoryPoolBench.cpp +++ b/bench/GrMemoryPoolBench.cpp @@ -21,12 +21,16 @@ struct A { int gStuff[10]; #if OVERRIDE_NEW - void* operator new (size_t size) { return gBenchPool.allocate(size); } - void operator delete (void* mem) { if (mem) { return gBenchPool.release(mem); } } + void* operator new(size_t size) { return gBenchPool->allocate(size); } + void operator delete(void* mem) { + if (mem) { + return gBenchPool->release(mem); + } + } #endif - static GrMemoryPool gBenchPool; + static std::unique_ptr gBenchPool; }; -GrMemoryPool A::gBenchPool(10 * (1 << 10), 10 * (1 << 10)); +std::unique_ptr A::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10)); /** * This benchmark creates and deletes objects in stack order @@ -83,12 +87,16 @@ private: struct B { int gStuff[10]; #if OVERRIDE_NEW - void* operator new (size_t size) { return gBenchPool.allocate(size); } - void operator delete (void* mem) { if (mem) { return gBenchPool.release(mem); } } + void* operator new(size_t size) { return gBenchPool->allocate(size); } + void operator delete(void* mem) { + if (mem) { + return gBenchPool->release(mem); + } + } #endif - static GrMemoryPool gBenchPool; + static std::unique_ptr gBenchPool; }; -GrMemoryPool B::gBenchPool(10 * (1 << 10), 10 * (1 << 10)); +std::unique_ptr B::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10)); /** * This benchmark creates objects and deletes them in random order @@ -128,12 +136,16 @@ private: struct C { int gStuff[10]; #if OVERRIDE_NEW - void* operator new (size_t size) { return gBenchPool.allocate(size); } - void operator delete (void* mem) { if (mem) { return gBenchPool.release(mem); } } + void* operator new(size_t size) { return gBenchPool->allocate(size); } + void operator delete(void* mem) { + if (mem) { + return gBenchPool->release(mem); + } + } #endif - static GrMemoryPool gBenchPool; + static std::unique_ptr gBenchPool; }; -GrMemoryPool C::gBenchPool(10 * (1 << 10), 10 * (1 << 10)); +std::unique_ptr C::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10)); /** * This benchmark creates objects and deletes them in queue order diff --git a/src/gpu/GrMemoryPool.cpp b/src/gpu/GrMemoryPool.cpp index 8a92a5e384..a474469e21 100644 --- a/src/gpu/GrMemoryPool.cpp +++ b/src/gpu/GrMemoryPool.cpp @@ -18,26 +18,26 @@ #define VALIDATE #endif -void GrOpMemoryPool::release(std::unique_ptr op) { - GrOp* tmp = op.release(); - SkASSERT(tmp); - tmp->~GrOp(); - fMemoryPool.release(tmp); +std::unique_ptr GrMemoryPool::Make(size_t preallocSize, size_t minAllocSize) { + preallocSize = std::max(preallocSize, kMinAllocationSize); + static constexpr size_t kPoolSize = GrSizeAlignUp(sizeof(GrMemoryPool), kAlignment); + size_t size = kPoolSize + preallocSize; + void* mem = operator new(size); + void* preallocStart = static_cast(mem) + kPoolSize; + return std::unique_ptr( + new (mem) GrMemoryPool(preallocStart, preallocSize, minAllocSize)); } -constexpr size_t GrMemoryPool::kSmallestMinAllocSize; - -GrMemoryPool::GrMemoryPool(size_t preallocSize, size_t minAllocSize) { +GrMemoryPool::GrMemoryPool(void* preallocStart, size_t preallocSize, size_t minAllocSize) { SkDEBUGCODE(fAllocationCnt = 0); SkDEBUGCODE(fAllocBlockCnt = 0); - minAllocSize = SkTMax(GrSizeAlignUp(minAllocSize, kAlignment), kSmallestMinAllocSize); - preallocSize = SkTMax(GrSizeAlignUp(preallocSize, kAlignment), minAllocSize); + minAllocSize = std::max(minAllocSize, kMinAllocationSize); fMinAllocSize = minAllocSize; fSize = 0; - fHead = CreateBlock(preallocSize); + fHead = InitBlock(preallocStart, preallocSize); fTail = fHead; fHead->fNext = nullptr; fHead->fPrev = nullptr; @@ -62,7 +62,7 @@ GrMemoryPool::~GrMemoryPool() { SkASSERT(0 == fAllocationCnt); SkASSERT(fHead == fTail); SkASSERT(0 == fHead->fLiveCount); - DeleteBlock(fHead); + SkASSERT(kAssignedMarker == fHead->fBlockSentinal); }; void* GrMemoryPool::allocate(size_t size) { @@ -71,7 +71,7 @@ void* GrMemoryPool::allocate(size_t size) { size = GrSizeAlignUp(size, kAlignment); if (fTail->fFreeSize < size) { size_t blockSize = size + kHeaderSize; - blockSize = SkTMax(blockSize, fMinAllocSize); + blockSize = std::max(blockSize, fMinAllocSize); BlockHeader* block = CreateBlock(blockSize); block->fPrev = fTail; @@ -149,11 +149,13 @@ void GrMemoryPool::release(void* p) { } GrMemoryPool::BlockHeader* GrMemoryPool::CreateBlock(size_t blockSize) { - blockSize = SkTMax(blockSize, kHeaderSize); - BlockHeader* block = - reinterpret_cast(sk_malloc_throw(blockSize)); - // we assume malloc gives us aligned memory - SkASSERT(!(reinterpret_cast(block) % kAlignment)); + blockSize = std::max(blockSize, kHeaderSize); + return InitBlock(sk_malloc_throw(blockSize), blockSize); +} + +auto GrMemoryPool::InitBlock(void* mem, size_t blockSize) -> BlockHeader* { + SkASSERT(!(reinterpret_cast(mem) % kAlignment)); + auto block = reinterpret_cast(mem); SkDEBUGCODE(block->fBlockSentinal = kAssignedMarker); block->fLiveCount = 0; block->fFreeSize = blockSize - kHeaderSize; @@ -215,3 +217,36 @@ void GrMemoryPool::validate() { SkASSERT(fAllocBlockCnt != 0 || fSize == 0); #endif } + +//////////////////////////////////////////////////////////////////////////////////////// + +static constexpr size_t kOpPoolSize = + GrSizeAlignUp(sizeof(GrOpMemoryPool), GrMemoryPool::kAlignment); + +GrOpMemoryPool::~GrOpMemoryPool() { this->pool()->~GrMemoryPool(); } + +std::unique_ptr GrOpMemoryPool::Make(size_t preallocSize, size_t minAllocSize) { + preallocSize = std::max(preallocSize, GrMemoryPool::kMinAllocationSize); + static constexpr size_t kOpPoolSize = + GrSizeAlignUp(sizeof(GrOpMemoryPool), GrMemoryPool::kAlignment); + static constexpr size_t kPoolSize = + GrSizeAlignUp(sizeof(GrMemoryPool), GrMemoryPool::kAlignment); + size_t size = kOpPoolSize + kPoolSize + preallocSize; + void* mem = operator new(size); + void* memPoolPtr = static_cast(mem) + kOpPoolSize; + void* preallocStart = static_cast(mem) + kOpPoolSize + kPoolSize; + new (memPoolPtr) GrMemoryPool(preallocStart, preallocSize, minAllocSize); + return std::unique_ptr(new (mem) GrOpMemoryPool()); +} + +void GrOpMemoryPool::release(std::unique_ptr op) { + GrOp* tmp = op.release(); + SkASSERT(tmp); + tmp->~GrOp(); + this->pool()->release(tmp); +} + +GrMemoryPool* GrOpMemoryPool::pool() const { + auto addr = reinterpret_cast(this) + kOpPoolSize; + return reinterpret_cast(const_cast(addr)); +} diff --git a/src/gpu/GrMemoryPool.h b/src/gpu/GrMemoryPool.h index 535ad9bb75..364f58e32e 100644 --- a/src/gpu/GrMemoryPool.h +++ b/src/gpu/GrMemoryPool.h @@ -21,23 +21,27 @@ * requests. It is optimized for allocate / release speed over memory * efficiency. The interface is designed to be used to implement operator new * and delete overrides. All allocations are expected to be released before the - * pool's destructor is called. Allocations will be 8-byte aligned. + * pool's destructor is called. Allocations will be aligned to + * sizeof(std::max_align_t). */ class GrMemoryPool { public: + // Guaranteed alignment of pointer returned by allocate(). + static constexpr size_t kAlignment = alignof(std::max_align_t); + // Minimum size this class will allocate at once. + static constexpr size_t kMinAllocationSize = 1 << 10; + /** * Prealloc size is the amount of space to allocate at pool creation * time and keep around until pool destruction. The min alloc size is * the smallest allowed size of additional allocations. Both sizes are - * adjusted to ensure that: - * 1. they are are 8-byte aligned - * 2. minAllocSize >= kSmallestMinAllocSize - * 3. preallocSize >= minAllocSize + * adjusted to ensure that they are at least as large as kMinAllocationSize. * - * Both sizes is what the pool will end up allocating from the system, and + * Both sizes are what the pool will end up allocating from the system, and * portions of the allocated memory is used for internal bookkeeping. */ - GrMemoryPool(size_t preallocSize, size_t minAllocSize); + static std::unique_ptr Make(size_t preallocSize, size_t minAllocSize); + void operator delete(void* p) { ::operator delete(p); } ~GrMemoryPool(); @@ -66,15 +70,14 @@ public: */ size_t preallocSize() const { return fHead->fSize; } - /** - * Minimum value of minAllocSize constructor argument. - */ - constexpr static size_t kSmallestMinAllocSize = 1 << 10; private: + GrMemoryPool(void* preallocStart, size_t preallocSize, size_t minAllocSize); + struct BlockHeader; static BlockHeader* CreateBlock(size_t size); + static BlockHeader* InitBlock(void* mem, size_t blockSize); static void DeleteBlock(BlockHeader* block); @@ -115,39 +118,37 @@ private: SkTHashSet fAllocatedIDs; #endif -protected: - enum { - // We assume this alignment is good enough for everybody. - kAlignment = 8, - kHeaderSize = GrSizeAlignUp(sizeof(BlockHeader), kAlignment), - kPerAllocPad = GrSizeAlignUp(sizeof(AllocHeader), kAlignment), - }; + friend class GrOpMemoryPool; + + static constexpr size_t kHeaderSize = GrSizeAlignUp(sizeof(BlockHeader), kAlignment); + static constexpr size_t kPerAllocPad = GrSizeAlignUp(sizeof(AllocHeader), kAlignment); }; class GrOp; class GrOpMemoryPool { public: - GrOpMemoryPool(size_t preallocSize, size_t minAllocSize) - : fMemoryPool(preallocSize, minAllocSize) { - } + static std::unique_ptr Make(size_t preallocSize, size_t minAllocSize); + void operator delete(void* p) { ::operator delete(p); } + + ~GrOpMemoryPool(); template std::unique_ptr allocate(OpArgs&&... opArgs) { - char* mem = (char*) fMemoryPool.allocate(sizeof(Op)); + auto mem = this->pool()->allocate(sizeof(Op)); return std::unique_ptr(new (mem) Op(std::forward(opArgs)...)); } - void* allocate(size_t size) { - return fMemoryPool.allocate(size); - } + void* allocate(size_t size) { return this->pool()->allocate(size); } void release(std::unique_ptr op); - bool isEmpty() const { return fMemoryPool.isEmpty(); } + bool isEmpty() const { return this->pool()->isEmpty(); } private: - GrMemoryPool fMemoryPool; + GrMemoryPool* pool() const; + + GrOpMemoryPool() = default; }; #endif diff --git a/src/gpu/GrProcessor.cpp b/src/gpu/GrProcessor.cpp index aae1029750..4b7b2b1ec9 100644 --- a/src/gpu/GrProcessor.cpp +++ b/src/gpu/GrProcessor.cpp @@ -116,7 +116,7 @@ public: #endif GrMemoryPool* pool() const { - static GrMemoryPool* gPool = new GrMemoryPool(4096, 4096); + static GrMemoryPool* gPool = GrMemoryPool::Make(4096, 4096).release(); return gPool; } }; diff --git a/src/gpu/GrRecordingContext.cpp b/src/gpu/GrRecordingContext.cpp index aa7d1a1a6b..cf38a21669 100644 --- a/src/gpu/GrRecordingContext.cpp +++ b/src/gpu/GrRecordingContext.cpp @@ -122,7 +122,7 @@ GrOpMemoryPool* GrRecordingContext::opMemoryPool() { // DDL TODO: should the size of the memory pool be decreased in DDL mode? CPU-side memory // consumed in DDL mode vs. normal mode for a single skp might be a good metric of wasted // memory. - fOpMemoryPool = std::make_unique(16384, 16384); + fOpMemoryPool = GrOpMemoryPool::Make(16384, 16384); } return fOpMemoryPool.get(); diff --git a/tests/GrMemoryPoolTest.cpp b/tests/GrMemoryPoolTest.cpp index 3eb10ce12c..f5b341eb08 100644 --- a/tests/GrMemoryPoolTest.cpp +++ b/tests/GrMemoryPoolTest.cpp @@ -27,7 +27,7 @@ public: virtual ~A() {} void* operator new(size_t size) { - if (!gPool.get()) { + if (!gPool) { return ::operator new(size); } else { return gPool->allocate(size); @@ -35,7 +35,7 @@ public: } void operator delete(void* p) { - if (!gPool.get()) { + if (!gPool) { ::operator delete(p); } else { return gPool->release(p); @@ -45,13 +45,10 @@ public: static A* Create(SkRandom* r); static void SetAllocator(size_t preallocSize, size_t minAllocSize) { - GrMemoryPool* pool = new GrMemoryPool(preallocSize, minAllocSize); - gPool.reset(pool); + gPool = GrMemoryPool::Make(preallocSize, minAllocSize); } - static void ResetAllocator() { - gPool.reset(nullptr); - } + static void ResetAllocator() { gPool.reset(); } private: static std::unique_ptr gPool; @@ -246,9 +243,9 @@ private: }; DEF_TEST(GrMemoryPoolAPI, reporter) { - constexpr size_t kSmallestMinAllocSize = GrMemoryPool::kSmallestMinAllocSize; + constexpr size_t kSmallestMinAllocSize = GrMemoryPool::kMinAllocationSize; - // Allocates memory until pool adds a new block (pool.size() changes). + // Allocates memory until pool adds a new block (pool->size() changes). auto allocateMemory = [](GrMemoryPool& pool, AutoPoolReleaser& r) { size_t origPoolSize = pool.size(); while (pool.size() == origPoolSize) { @@ -256,65 +253,58 @@ DEF_TEST(GrMemoryPoolAPI, reporter) { } }; - // Effective prealloc space capacity is >= kSmallestMinAllocSize. + // Effective prealloc space capacity is >= kMinAllocationSize. { - GrMemoryPool pool(0, 0); - REPORTER_ASSERT(reporter, pool.preallocSize() == kSmallestMinAllocSize); + auto pool = GrMemoryPool::Make(0, 0); + REPORTER_ASSERT(reporter, pool->preallocSize() == kSmallestMinAllocSize); } - // Effective prealloc space capacity is >= minAllocSize. + // Effective block size capacity >= kMinAllocationSize. { - constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 2; - GrMemoryPool pool(kSmallestMinAllocSize, kMinAllocSize); - REPORTER_ASSERT(reporter, pool.preallocSize() == kMinAllocSize); - } + auto pool = GrMemoryPool::Make(kSmallestMinAllocSize, kSmallestMinAllocSize / 2); + AutoPoolReleaser r(*pool); - // Effective block size capacity >= kSmallestMinAllocSize. - { - GrMemoryPool pool(kSmallestMinAllocSize, kSmallestMinAllocSize / 2); - AutoPoolReleaser r(pool); - - allocateMemory(pool, r); - REPORTER_ASSERT(reporter, pool.size() == kSmallestMinAllocSize); + allocateMemory(*pool, r); + REPORTER_ASSERT(reporter, pool->size() == kSmallestMinAllocSize); } // Pool allocates exactly preallocSize on creation. { constexpr size_t kPreallocSize = kSmallestMinAllocSize * 5; - GrMemoryPool pool(kPreallocSize, 0); - REPORTER_ASSERT(reporter, pool.preallocSize() == kPreallocSize); + auto pool = GrMemoryPool::Make(kPreallocSize, 0); + REPORTER_ASSERT(reporter, pool->preallocSize() == kPreallocSize); } // Pool allocates exactly minAllocSize when it expands. { constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 7; - GrMemoryPool pool(0, kMinAllocSize); - AutoPoolReleaser r(pool); + auto pool = GrMemoryPool::Make(0, kMinAllocSize); + AutoPoolReleaser r(*pool); - allocateMemory(pool, r); - REPORTER_ASSERT(reporter, pool.size() == kMinAllocSize); + allocateMemory(*pool, r); + REPORTER_ASSERT(reporter, pool->size() == kMinAllocSize); - allocateMemory(pool, r); - REPORTER_ASSERT(reporter, pool.size() == 2 * kMinAllocSize); + allocateMemory(*pool, r); + REPORTER_ASSERT(reporter, pool->size() == 2 * kMinAllocSize); } // When asked to allocate amount > minAllocSize, pool allocates larger block // to accommodate all internal structures. { constexpr size_t kMinAllocSize = kSmallestMinAllocSize * 2; - GrMemoryPool pool(kSmallestMinAllocSize, kMinAllocSize); - AutoPoolReleaser r(pool); + auto pool = GrMemoryPool::Make(kSmallestMinAllocSize, kMinAllocSize); + AutoPoolReleaser r(*pool); - REPORTER_ASSERT(reporter, pool.size() == 0); + REPORTER_ASSERT(reporter, pool->size() == 0); constexpr size_t hugeSize = 10 * kMinAllocSize; - r.add(pool.allocate(hugeSize)); - REPORTER_ASSERT(reporter, pool.size() > hugeSize); + r.add(pool->allocate(hugeSize)); + REPORTER_ASSERT(reporter, pool->size() > hugeSize); // Block size allocated to accommodate huge request doesn't include any extra // space, so next allocation request allocates a new block. - size_t hugeBlockSize = pool.size(); - r.add(pool.allocate(0)); - REPORTER_ASSERT(reporter, pool.size() == hugeBlockSize + kMinAllocSize); + size_t hugeBlockSize = pool->size(); + r.add(pool->allocate(0)); + REPORTER_ASSERT(reporter, pool->size() == hugeBlockSize + kMinAllocSize); } }