From 24d377eedfe5d90e726d08270916939231838858 Mon Sep 17 00:00:00 2001 From: Brian Salomon Date: Tue, 23 Apr 2019 15:24:31 -0400 Subject: [PATCH] Store tasks to execute before VK secondary command buffers generically. Repurpose GrTRecorder for storing these tasks. It's currently unused. Reimplement on top of SkArenaAlloc and using emplace methods now that we have C++14. Currently it stores copy and upload tasks. In the future it will store transfer-out commands. Removes the optimization that reset copy-ins on clear/discard. However, none of our existing tests exercised it. Change-Id: I0474f77cc2d368461d542de50a7a0c5609312001 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/209643 Reviewed-by: Chris Dalton Commit-Queue: Brian Salomon --- include/gpu/GrTypes.h | 26 +- src/gpu/GrMemoryPool.h | 4 +- src/gpu/GrTRecorder.h | 429 +++++++--------------------- src/gpu/ccpr/GrCCFiller.cpp | 2 +- src/gpu/ccpr/GrCCStroker.cpp | 12 +- src/gpu/vk/GrVkGpuCommandBuffer.cpp | 72 +++-- src/gpu/vk/GrVkGpuCommandBuffer.h | 54 ++-- tests/GrTRecorderTest.cpp | 185 ++++++------ 8 files changed, 284 insertions(+), 500 deletions(-) diff --git a/include/gpu/GrTypes.h b/include/gpu/GrTypes.h index 4bda9a99cf..7084d92e32 100644 --- a/include/gpu/GrTypes.h +++ b/include/gpu/GrTypes.h @@ -134,50 +134,42 @@ template inline TFlags& operator&=(TFlags& a, GrTFlagsMask 0); return (x + (y-1)) / y; } -static inline uint32_t GrUIDivRoundUp(uint32_t x, uint32_t y) { +static inline constexpr uint32_t GrUIDivRoundUp(uint32_t x, uint32_t y) { return (x + (y-1)) / y; } -static inline size_t GrSizeDivRoundUp(size_t x, size_t y) { - return (x + (y-1)) / y; -} - -// compile time, evaluates Y multiple times -#define GR_CT_DIV_ROUND_UP(X, Y) (((X) + ((Y)-1)) / (Y)) +static inline constexpr size_t GrSizeDivRoundUp(size_t x, size_t y) { return (x + (y - 1)) / y; } /** * align up */ -static inline uint32_t GrUIAlignUp(uint32_t x, uint32_t alignment) { +static inline constexpr uint32_t GrUIAlignUp(uint32_t x, uint32_t alignment) { return GrUIDivRoundUp(x, alignment) * alignment; } -static inline size_t GrSizeAlignUp(size_t x, size_t alignment) { +static inline constexpr size_t GrSizeAlignUp(size_t x, size_t alignment) { return GrSizeDivRoundUp(x, alignment) * alignment; } -// compile time, evaluates A multiple times -#define GR_CT_ALIGN_UP(X, A) (GR_CT_DIV_ROUND_UP((X),(A)) * (A)) - /** * amount of pad needed to align up */ -static inline uint32_t GrUIAlignUpPad(uint32_t x, uint32_t alignment) { +static inline constexpr uint32_t GrUIAlignUpPad(uint32_t x, uint32_t alignment) { return (alignment - x % alignment) % alignment; } -static inline size_t GrSizeAlignUpPad(size_t x, size_t alignment) { +static inline constexpr size_t GrSizeAlignUpPad(size_t x, size_t alignment) { return (alignment - x % alignment) % alignment; } /** * align down */ -static inline uint32_t GrUIAlignDown(uint32_t x, uint32_t alignment) { +static inline constexpr uint32_t GrUIAlignDown(uint32_t x, uint32_t alignment) { return (x / alignment) * alignment; } -static inline size_t GrSizeAlignDown(size_t x, uint32_t alignment) { +static inline constexpr size_t GrSizeAlignDown(size_t x, uint32_t alignment) { return (x / alignment) * alignment; } diff --git a/src/gpu/GrMemoryPool.h b/src/gpu/GrMemoryPool.h index 15399b6852..7000739abb 100644 --- a/src/gpu/GrMemoryPool.h +++ b/src/gpu/GrMemoryPool.h @@ -119,8 +119,8 @@ protected: enum { // We assume this alignment is good enough for everybody. kAlignment = 8, - kHeaderSize = GR_CT_ALIGN_UP(sizeof(BlockHeader), kAlignment), - kPerAllocPad = GR_CT_ALIGN_UP(sizeof(AllocHeader), kAlignment), + kHeaderSize = GrSizeAlignUp(sizeof(BlockHeader), kAlignment), + kPerAllocPad = GrSizeAlignUp(sizeof(AllocHeader), kAlignment), }; }; diff --git a/src/gpu/GrTRecorder.h b/src/gpu/GrTRecorder.h index 74dd1380b0..5ec47aae7b 100644 --- a/src/gpu/GrTRecorder.h +++ b/src/gpu/GrTRecorder.h @@ -8,45 +8,31 @@ #ifndef GrTRecorder_DEFINED #define GrTRecorder_DEFINED -#include "SkTypes.h" -#include - -template class GrTRecorder; -template struct GrTRecorderAllocWrapper; +#include "GrTypes.h" +#include "SkArenaAlloc.h" +#include "SkTLogic.h" /** * Records a list of items with a common base type, optional associated data, and - * permanent memory addresses. + * permanent memory addresses. It supports forward iteration. * - * This class preallocates its own chunks of memory for hosting objects, so new items can - * be created without excessive calls to malloc(). - * - * To create a new item and append it to the back of the list, use the following macros: - * - * GrNEW_APPEND_TO_RECORDER(recorder, SubclassName, (args)) - * GrNEW_APPEND_WITH_DATA_TO_RECORDER(recorder, SubclassName, (args), sizeOfData) + * This class allocates space for the stored items and associated data in a SkArenaAlloc. + * There is an overhead of 1 pointer for each stored item. * * Upon reset or delete, the items are destructed in the same order they were received, * not reverse (stack) order. * - * @param TBase Common base type of items in the list. If TBase is not a class with a - * virtual destructor, the client is responsible for invoking any necessary - * destructors. - * - * For now, any subclass used in the list must have the same start address - * as TBase (or in other words, the types must be convertible via - * reinterpret_cast<>). Classes with multiple inheritance (or any subclass - * on an obscure compiler) may not be compatible. This is runtime asserted - * in debug builds. - * - * @param TAlign A type whose size is the desired memory alignment for object allocations. - * This should be the largest known alignment requirement for all objects - * that may be stored in the list. + * @param TBase Common base type of items in the list. It is assumed that the items are + * trivially destructable or that TBase has a virtual destructor as ~TBase() + * is called to destroy the items. */ -template class GrTRecorder : SkNoncopyable { +template class GrTRecorder { +private: + template class IterImpl; + public: - class Iter; - class ReverseIter; + using iterator = IterImpl; + using const_iterator = IterImpl; /** * Create a recorder. @@ -54,338 +40,137 @@ public: * @param initialSizeInBytes The amount of memory reserved by the recorder initially, and after calls to reset(). */ - GrTRecorder(int initialSizeInBytes) - : fHeadBlock(MemBlock::Alloc(LengthOf(initialSizeInBytes), nullptr)), - fTailBlock(fHeadBlock), - fLastItem(nullptr) {} + explicit GrTRecorder(size_t initialSizeInBytes) : fArena(initialSizeInBytes) {} + GrTRecorder(const GrTRecorder&) = delete; + GrTRecorder& operator=(const GrTRecorder&) = delete; - ~GrTRecorder() { - this->reset(); - MemBlock::Free(fHeadBlock); - } + ~GrTRecorder() { this->reset(); } - bool empty() { return !fLastItem; } + bool empty() { return !SkToBool(fTail); } + /** The last item. Must not be empty. */ TBase& back() { SkASSERT(!this->empty()); - return *reinterpret_cast(fLastItem); + return *fTail->get(); } - /** - * Removes and destroys the last block added to the recorder. It may not be called when the - * recorder is empty. - */ - void pop_back(); + /** Forward mutable iteration */ + iterator begin() { return iterator(fHead); } + iterator end() { return iterator(nullptr); } - /** - * Destruct all items in the list and reset to empty. - */ + /** Forward const iteration */ + const_iterator begin() const { return const_iterator(fHead); } + const_iterator end() const { return const_iterator(nullptr); } + + /** Destruct all items in the list and reset to empty. Frees memory allocated from arena. */ void reset(); /** - * Retrieve the extra data associated with an item that was allocated using - * GrNEW_APPEND_WITH_DATA_TO_RECORDER(). - * - * @param item The item whose data to retrieve. The pointer must be of the same type - * that was allocated initally; it can't be a pointer to a base class. - * - * @return The item's associated data. + * Emplace a new TItem (which derives from TBase) in the recorder. This requires equivalence + * between reinterpret_cast and static_cast when operating on TItem*. + * Multiple inheritance may make this not true. It is runtime asserted. */ - template static const void* GetDataForItem(const TItem* item) { - const TAlign* ptr = reinterpret_cast(item); - return &ptr[length_of::kValue]; - } - template static void* GetDataForItem(TItem* item) { - TAlign* ptr = reinterpret_cast(item); - return &ptr[length_of::kValue]; + template TItem& emplace(Args... args) { + return this->emplaceWithData(0, std::forward(args)...); } + /** + * Emplace a new TItem (which derives from TBase) in the recorder with extra data space. The + * extra data immediately follows the stored item with no extra alignment. E.g., + * void* extraData = &recorder->emplaceWithData(dataSize, ...) + 1; + * + * This requires equivalence between reinterpret_cast and static_cast when + * operating on TItem*. Multiple inheritance may make this not true. It is runtime asserted. + */ + template + SK_WHEN((std::is_base_of::value), TItem&) + emplaceWithData(size_t extraDataSize, Args... args); + private: - template struct length_of { - enum { kValue = (sizeof(TItem) + sizeof(TAlign) - 1) / sizeof(TAlign) }; - }; - static int LengthOf(int bytes) { return (bytes + sizeof(TAlign) - 1) / sizeof(TAlign); } - struct Header { - int fTotalLength; // The length of an entry including header, item, and data in TAligns. - int fPrevLength; // Same but for the previous entry. Used for iterating backwards. + Header* fNext = nullptr; + // We always store the T immediately after the header (and ensure proper alignment). See + // emplaceWithData() implementation. + TBase* get() const { return reinterpret_cast(const_cast(this) + 1); } }; - template void* alloc_back(int dataLength); - struct MemBlock : SkNoncopyable { - /** Allocates a new block and appends it to prev if not nullptr. The length param is in units - of TAlign. */ - static MemBlock* Alloc(int length, MemBlock* prev) { - MemBlock* block = reinterpret_cast( - sk_malloc_throw(sizeof(TAlign) * (length_of::kValue + length))); - block->fLength = length; - block->fBack = 0; - block->fNext = nullptr; - block->fPrev = prev; - if (prev) { - SkASSERT(nullptr == prev->fNext); - prev->fNext = block; - } - return block; - } - - // Frees from this block forward. Also adjusts prev block's next ptr. - static void Free(MemBlock* block) { - if (block && block->fPrev) { - SkASSERT(block->fPrev->fNext == block); - block->fPrev->fNext = nullptr; - } - while (block) { - MemBlock* next = block->fNext; - sk_free(block); - block = next; - } - } - - TAlign& operator [](int i) { - return reinterpret_cast(this)[length_of::kValue + i]; - } - - int fLength; // Length in units of TAlign of the block. - int fBack; // Offset, in TAligns, to unused portion of the memory block. - MemBlock* fNext; - MemBlock* fPrev; - }; - MemBlock* const fHeadBlock; - MemBlock* fTailBlock; - - void* fLastItem; // really a ptr to TBase - - template friend struct GrTRecorderAllocWrapper; - - template - friend void* operator new(size_t, GrTRecorder&, - const GrTRecorderAllocWrapper&); - - friend class Iter; - friend class ReverseIter; + SkArenaAlloc fArena; + Header* fHead = nullptr; + Header* fTail = nullptr; }; //////////////////////////////////////////////////////////////////////////////// -template -void GrTRecorder::pop_back() { - SkASSERT(fLastItem); - Header* header = reinterpret_cast( - reinterpret_cast(fLastItem) - length_of
::kValue); - fTailBlock->fBack -= header->fTotalLength; - reinterpret_cast(fLastItem)->~TBase(); - - int lastItemLength = header->fPrevLength; - - if (!header->fPrevLength) { - // We popped the first entry in the recorder. - SkASSERT(0 == fTailBlock->fBack); - fLastItem = nullptr; - return; +template +template +inline SK_WHEN((std::is_base_of::value), TItem&) +GrTRecorder::emplaceWithData(size_t extraDataSize, Args... args) { + static constexpr size_t kTAlign = alignof(TItem); + static constexpr size_t kHeaderAlign = alignof(Header); + static constexpr size_t kAllocAlign = kTAlign > kHeaderAlign ? kTAlign : kHeaderAlign; + static constexpr size_t kTItemOffset = GrSizeAlignUp(sizeof(Header), kAllocAlign); + // We're assuming if we back up from kItemOffset by sizeof(Header) we will still be aligned. + GR_STATIC_ASSERT(sizeof(Header) % alignof(Header) == 0); + const size_t totalSize = kTItemOffset + sizeof(TItem) + extraDataSize; + auto alloc = reinterpret_cast(fArena.makeBytesAlignedTo(totalSize, kAllocAlign)); + Header* header = new (alloc + kTItemOffset - sizeof(Header)) Header(); + if (fTail) { + fTail->fNext = header; } - while (!fTailBlock->fBack) { - // We popped the last entry in a block that isn't the head block. Move back a block but - // don't free it since we'll probably grow into it shortly. - fTailBlock = fTailBlock->fPrev; - SkASSERT(fTailBlock); + fTail = header; + if (!fHead) { + fHead = header; } - fLastItem = &(*fTailBlock)[fTailBlock->fBack - lastItemLength + length_of
::kValue]; + auto* item = new (alloc + kTItemOffset) TItem(std::forward(args)...); + // We require that we can reinterpret_cast between TBase* and TItem*. Could not figure out how + // to statically assert this. See proposal for std::is_initial_base_of here: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0466r0.pdf + SkASSERT(reinterpret_cast(item) == + reinterpret_cast(static_cast(item))); + return *item; } -template -template -void* GrTRecorder::alloc_back(int dataLength) { - // Find the header of the previous entry and get its length. We need to store that in the new - // header for backwards iteration (pop_back()). - int prevLength = 0; - if (fLastItem) { - Header* lastHeader = reinterpret_cast( - reinterpret_cast(fLastItem) - length_of
::kValue); - prevLength = lastHeader->fTotalLength; +template inline void GrTRecorder::reset() { + for (auto& i : *this) { + i.~TBase(); } - - const int totalLength = length_of
::kValue + length_of::kValue + dataLength; - - // Check if there is room in the current block and if not walk to next (allocating if - // necessary). Note that pop_back() and reset() can leave the recorder in a state where it - // has preallocated blocks hanging off the tail that are currently unused. - while (fTailBlock->fBack + totalLength > fTailBlock->fLength) { - if (!fTailBlock->fNext) { - fTailBlock = MemBlock::Alloc(SkTMax(2 * fTailBlock->fLength, totalLength), fTailBlock); - } else { - fTailBlock = fTailBlock->fNext; - } - SkASSERT(0 == fTailBlock->fBack); - } - - Header* header = reinterpret_cast(&(*fTailBlock)[fTailBlock->fBack]); - void* rawPtr = &(*fTailBlock)[fTailBlock->fBack + length_of
::kValue]; - - header->fTotalLength = totalLength; - header->fPrevLength = prevLength; - fLastItem = rawPtr; - fTailBlock->fBack += totalLength; - - // FIXME: We currently require that the base and subclass share the same start address. - // This is not required by the C++ spec, and is likely to not be true in the case of - // multiple inheritance or a base class that doesn't have virtual methods (when the - // subclass does). It would be ideal to find a more robust solution that comes at no - // extra cost to performance or code generality. - SkDEBUGCODE(void* baseAddr = fLastItem; - void* subclassAddr = rawPtr); - SkASSERT(baseAddr == subclassAddr); - - return rawPtr; + GR_STATIC_ASSERT(std::is_trivially_destructible
::value); + fHead = fTail = nullptr; + fArena.reset(); } /** - * Iterates through a recorder from front to back. The initial state of the iterator is - * to not have the front item loaded yet; next() must be called first. Usage model: - * - * GrTRecorder::Iter iter(recorder); - * while (iter.next()) { - * iter->doSomething(); - * } + * Iterates through a recorder front-to-back, const or not. */ -template -class GrTRecorder::Iter { +template template class GrTRecorder::IterImpl { +private: + using T = typename std::conditional::type; + public: - Iter(GrTRecorder& recorder) : fBlock(recorder.fHeadBlock), fPosition(0), fItem(nullptr) {} + IterImpl() = default; - bool next() { - while (fPosition >= fBlock->fBack) { - SkASSERT(fPosition == fBlock->fBack); - if (!fBlock->fNext) { - return false; - } - fBlock = fBlock->fNext; - fPosition = 0; - } - - Header* header = reinterpret_cast(&(*fBlock)[fPosition]); - fItem = reinterpret_cast(&(*fBlock)[fPosition + length_of
::kValue]); - fPosition += header->fTotalLength; - return true; + IterImpl operator++() { + fCurr = fCurr->fNext; + return *this; } - TBase* get() const { - SkASSERT(fItem); - return fItem; + IterImpl operator++(int) { + auto old = fCurr; + fCurr = fCurr->fNext; + return {old}; } - TBase* operator->() const { return this->get(); } + T& operator*() const { return *fCurr->get(); } + T* operator->() const { return fCurr->get(); } + + bool operator==(const IterImpl& that) const { return fCurr == that.fCurr; } + bool operator!=(const IterImpl& that) const { return !(*this == that); } private: - MemBlock* fBlock; - int fPosition; - TBase* fItem; + IterImpl(Header* curr) : fCurr(curr) {} + Header* fCurr = nullptr; + + friend class GrTRecorder; // To construct from Header. }; -/** - * Iterates through a recorder in reverse, from back to front. This version mirrors "Iter", - * so the initial state is to have recorder.back() loaded already. (Note that this will - * assert if the recorder is empty.) Usage model: - * - * GrTRecorder::ReverseIter reverseIter(recorder); - * do { - * reverseIter->doSomething(); - * } while (reverseIter.previous()); - */ -template -class GrTRecorder::ReverseIter { -public: - ReverseIter(GrTRecorder& recorder) - : fBlock(recorder.fTailBlock), - fItem(&recorder.back()) { - Header* lastHeader = reinterpret_cast( - reinterpret_cast(fItem) - length_of
::kValue); - fPosition = fBlock->fBack - lastHeader->fTotalLength; - } - - bool previous() { - Header* header = reinterpret_cast(&(*fBlock)[fPosition]); - - while (0 == fPosition) { - if (!fBlock->fPrev) { - // We've reached the front of the recorder. - return false; - } - fBlock = fBlock->fPrev; - fPosition = fBlock->fBack; - } - - fPosition -= header->fPrevLength; - SkASSERT(fPosition >= 0); - - fItem = reinterpret_cast(&(*fBlock)[fPosition + length_of
::kValue]); - return true; - } - - TBase* get() const { return fItem; } - TBase* operator->() const { return this->get(); } - -private: - MemBlock* fBlock; - int fPosition; - TBase* fItem; -}; - -template -void GrTRecorder::reset() { - Iter iter(*this); - while (iter.next()) { - iter->~TBase(); - } - - // Assume the next time this recorder fills up it will use approximately the same - // amount of space as last time. Leave enough space for up to ~50% growth; free - // everything else. - if (fTailBlock->fBack <= fTailBlock->fLength / 2) { - MemBlock::Free(fTailBlock->fNext); - } else if (fTailBlock->fNext) { - MemBlock::Free(fTailBlock->fNext->fNext); - fTailBlock->fNext->fNext = nullptr; - } - - for (MemBlock* block = fHeadBlock; block; block = block->fNext) { - block->fBack = 0; - } - - fTailBlock = fHeadBlock; - fLastItem = nullptr; -} - -//////////////////////////////////////////////////////////////////////////////// - -template struct GrTRecorderAllocWrapper { - GrTRecorderAllocWrapper() : fDataLength(0) {} - - template - GrTRecorderAllocWrapper(const GrTRecorder&, int sizeOfData) - : fDataLength(GrTRecorder::LengthOf(sizeOfData)) {} - - const int fDataLength; -}; - -template -void* operator new(size_t size, GrTRecorder& recorder, - const GrTRecorderAllocWrapper& wrapper) { - SkASSERT(size == sizeof(TItem)); - return recorder.template alloc_back(wrapper.fDataLength); -} - -template -void operator delete(void*, GrTRecorder&, const GrTRecorderAllocWrapper&) { - // We only provide an operator delete to work around compiler warnings that can come - // up for an unmatched operator new when compiling with exceptions. - SK_ABORT("Invalid Operation"); -} - -#define GrNEW_APPEND_TO_RECORDER(recorder, type_name, args) \ - (new (recorder, GrTRecorderAllocWrapper()) type_name args) - -#define GrNEW_APPEND_WITH_DATA_TO_RECORDER(recorder, type_name, args, size_of_data) \ - (new (recorder, GrTRecorderAllocWrapper(recorder, size_of_data)) type_name args) - #endif diff --git a/src/gpu/ccpr/GrCCFiller.cpp b/src/gpu/ccpr/GrCCFiller.cpp index 8e26c2f1bd..7facd5965f 100644 --- a/src/gpu/ccpr/GrCCFiller.cpp +++ b/src/gpu/ccpr/GrCCFiller.cpp @@ -315,7 +315,7 @@ bool GrCCFiller::prepareToDraw(GrOnFlushResourceProvider* onFlushRP) { // QuadPointInstance[]. So, reinterpreting the instance data as QuadPointInstance[], we start // them on the first index that will not overwrite previous TriPointInstance data. int quadBaseIdx = - GR_CT_DIV_ROUND_UP(triEndIdx * sizeof(TriPointInstance), sizeof(QuadPointInstance)); + GrSizeDivRoundUp(triEndIdx * sizeof(TriPointInstance), sizeof(QuadPointInstance)); fBaseInstances[0].fWeightedTriangles = quadBaseIdx; fBaseInstances[1].fWeightedTriangles = fBaseInstances[0].fWeightedTriangles + fTotalPrimitiveCounts[0].fWeightedTriangles; diff --git a/src/gpu/ccpr/GrCCStroker.cpp b/src/gpu/ccpr/GrCCStroker.cpp index 65f1c4169a..97ef8d243b 100644 --- a/src/gpu/ccpr/GrCCStroker.cpp +++ b/src/gpu/ccpr/GrCCStroker.cpp @@ -573,8 +573,8 @@ bool GrCCStroker::prepareToDraw(GrOnFlushResourceProvider* onFlushRP) { fBaseInstances[1].fStrokes[0] = fInstanceCounts[0]->fStrokes[0]; int endLinearStrokesIdx = fBaseInstances[1].fStrokes[0] + fInstanceCounts[1]->fStrokes[0]; - int cubicStrokesIdx = GR_CT_DIV_ROUND_UP(endLinearStrokesIdx * sizeof(LinearStrokeInstance), - sizeof(CubicStrokeInstance)); + int cubicStrokesIdx = GrSizeDivRoundUp(endLinearStrokesIdx * sizeof(LinearStrokeInstance), + sizeof(CubicStrokeInstance)); for (int i = 1; i <= kMaxNumLinearSegmentsLog2; ++i) { for (int j = 0; j < kNumScissorModes; ++j) { fBaseInstances[j].fStrokes[i] = cubicStrokesIdx; @@ -582,16 +582,16 @@ bool GrCCStroker::prepareToDraw(GrOnFlushResourceProvider* onFlushRP) { } } - int trianglesIdx = GR_CT_DIV_ROUND_UP(cubicStrokesIdx * sizeof(CubicStrokeInstance), - sizeof(TriangleInstance)); + int trianglesIdx = GrSizeDivRoundUp(cubicStrokesIdx * sizeof(CubicStrokeInstance), + sizeof(TriangleInstance)); fBaseInstances[0].fTriangles = trianglesIdx; fBaseInstances[1].fTriangles = fBaseInstances[0].fTriangles + fInstanceCounts[0]->fTriangles; int endTrianglesIdx = fBaseInstances[1].fTriangles + fInstanceCounts[1]->fTriangles; - int conicsIdx = GR_CT_DIV_ROUND_UP(endTrianglesIdx * sizeof(TriangleInstance), - sizeof(ConicInstance)); + int conicsIdx = + GrSizeDivRoundUp(endTrianglesIdx * sizeof(TriangleInstance), sizeof(ConicInstance)); fBaseInstances[0].fConics = conicsIdx; fBaseInstances[1].fConics = fBaseInstances[0].fConics + fInstanceCounts[0]->fConics; diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp index 515aea31cc..b38ae18b64 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp +++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp @@ -78,11 +78,7 @@ void get_vk_load_store_ops(GrLoadOp loadOpIn, GrStoreOp storeOpIn, } } -GrVkGpuRTCommandBuffer::GrVkGpuRTCommandBuffer(GrVkGpu* gpu) - : fCurrentCmdInfo(-1) - , fGpu(gpu) - , fLastPipelineState(nullptr) { -} +GrVkGpuRTCommandBuffer::GrVkGpuRTCommandBuffer(GrVkGpu* gpu) : fGpu(gpu) {} void GrVkGpuRTCommandBuffer::init() { GrVkRenderPass::LoadStoreOps vkColorOps(fVkColorLoadOp, fVkColorStoreOp); @@ -164,22 +160,15 @@ void GrVkGpuRTCommandBuffer::submit() { GrVkRenderTarget* vkRT = static_cast(fRenderTarget); GrVkImage* targetImage = vkRT->msaaImage() ? vkRT->msaaImage() : vkRT; GrStencilAttachment* stencil = fRenderTarget->renderTargetPriv().getStencilAttachment(); + auto currPreCmd = fPreCommandBufferTasks.begin(); for (int i = 0; i < fCommandBufferInfos.count(); ++i) { CommandBufferInfo& cbInfo = fCommandBufferInfos[i]; - for (int j = 0; j < cbInfo.fPreDrawUploads.count(); ++j) { - InlineUploadInfo& iuInfo = cbInfo.fPreDrawUploads[j]; - iuInfo.fFlushState->doUpload(iuInfo.fUpload); + for (int c = 0; c < cbInfo.fNumPreCmds; ++c, ++currPreCmd) { + currPreCmd->execute(this); } - for (int j = 0; j < cbInfo.fPreCopies.count(); ++j) { - CopyInfo& copyInfo = cbInfo.fPreCopies[j]; - fGpu->copySurface(fRenderTarget, fOrigin, copyInfo.fSrc.get(), copyInfo.fSrcOrigin, - copyInfo.fSrcRect, copyInfo.fDstPoint, copyInfo.fShouldDiscardDst); - } - - // TODO: Many things create a scratch texture which adds the discard immediately, but then // don't draw to it right away. This causes the discard to be ignored and we get yelled at // for loading uninitialized data. However, once MDB lands with reordering, the discard will @@ -260,6 +249,7 @@ void GrVkGpuRTCommandBuffer::submit() { &cbInfo.fColorClearValue, vkRT, fOrigin, iBounds); } } + SkASSERT(currPreCmd == fPreCommandBufferTasks.end()); } void GrVkGpuRTCommandBuffer::set(GrRenderTarget* rt, GrSurfaceOrigin origin, @@ -298,6 +288,7 @@ void GrVkGpuRTCommandBuffer::reset() { cbInfo.fRenderPass->unref(fGpu); } fCommandBufferInfos.reset(); + fPreCommandBufferTasks.reset(); fCurrentCmdInfo = -1; @@ -341,9 +332,6 @@ void GrVkGpuRTCommandBuffer::discard() { oldRP->unref(fGpu); cbInfo.fBounds.join(fRenderTarget->getBoundsRect()); cbInfo.fLoadStoreState = LoadStoreState::kStartsWithDiscard; - // If we are going to discard the whole render target then the results of any copies we did - // immediately before to the target won't matter, so just drop them. - cbInfo.fPreCopies.reset(); } } @@ -447,10 +435,6 @@ void GrVkGpuRTCommandBuffer::onClear(const GrFixedClip& clip, const SkPMColor4f& cbInfo.fColorClearValue.color = {{color.fR, color.fG, color.fB, color.fA}}; cbInfo.fLoadStoreState = LoadStoreState::kStartsWithClear; - // If we are going to clear the whole render target then the results of any copies we did - // immediately before to the target won't matter, so just drop them. - cbInfo.fPreCopies.reset(); - // Update command buffer bounds cbInfo.fBounds.join(fRenderTarget->getBoundsRect()); return; @@ -545,7 +529,21 @@ void GrVkGpuRTCommandBuffer::inlineUpload(GrOpFlushState* state, if (!fCommandBufferInfos[fCurrentCmdInfo].fIsEmpty) { this->addAdditionalRenderPass(); } - fCommandBufferInfos[fCurrentCmdInfo].fPreDrawUploads.emplace_back(state, upload); + + class InlineUpload : public PreCommandBufferTask { + public: + InlineUpload(GrOpFlushState* state, const GrDeferredTextureUploadFn& upload) + : fFlushState(state), fUpload(upload) {} + ~InlineUpload() override = default; + + void execute(GrVkGpuRTCommandBuffer*) override { fFlushState->doUpload(fUpload); } + + private: + GrOpFlushState* fFlushState; + GrDeferredTextureUploadFn fUpload; + }; + fPreCommandBufferTasks.emplace(state, upload); + ++fCommandBufferInfos[fCurrentCmdInfo].fNumPreCmds; } void GrVkGpuRTCommandBuffer::copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect, @@ -555,9 +553,35 @@ void GrVkGpuRTCommandBuffer::copy(GrSurface* src, GrSurfaceOrigin srcOrigin, con this->addAdditionalRenderPass(); } - fCommandBufferInfos[fCurrentCmdInfo].fPreCopies.emplace_back( + class Copy : public PreCommandBufferTask { + public: + Copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect, + const SkIPoint& dstPoint, bool shouldDiscardDst) + : fSrc(src) + , fSrcOrigin(srcOrigin) + , fSrcRect(srcRect) + , fDstPoint(dstPoint) + , fShouldDiscardDst(shouldDiscardDst) {} + ~Copy() override = default; + + void execute(GrVkGpuRTCommandBuffer* cb) override { + cb->fGpu->copySurface(cb->fRenderTarget, cb->fOrigin, fSrc.get(), fSrcOrigin, fSrcRect, + fDstPoint, fShouldDiscardDst); + } + + private: + using Src = GrPendingIOResource; + Src fSrc; + GrSurfaceOrigin fSrcOrigin; + SkIRect fSrcRect; + SkIPoint fDstPoint; + bool fShouldDiscardDst; + }; + + fPreCommandBufferTasks.emplace( src, srcOrigin, srcRect, dstPoint, LoadStoreState::kStartsWithDiscard == cbInfo.fLoadStoreState); + ++fCommandBufferInfos[fCurrentCmdInfo].fNumPreCmds; if (LoadStoreState::kLoadAndStore != cbInfo.fLoadStoreState) { // Change the render pass to do a load and store so we don't lose the results of our copy diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h index 4ff887574b..f4b03485f4 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.h +++ b/src/gpu/vk/GrVkGpuCommandBuffer.h @@ -12,6 +12,7 @@ #include "GrColor.h" #include "GrMesh.h" +#include "GrTRecorder.h" #include "GrTypes.h" #include "GrVkPipelineState.h" #include "vk/GrVkTypes.h" @@ -148,28 +149,16 @@ private: void addAdditionalCommandBuffer(); void addAdditionalRenderPass(); - struct InlineUploadInfo { - InlineUploadInfo(GrOpFlushState* state, const GrDeferredTextureUploadFn& upload) - : fFlushState(state), fUpload(upload) {} + class PreCommandBufferTask { + public: + virtual ~PreCommandBufferTask() = default; - GrOpFlushState* fFlushState; - GrDeferredTextureUploadFn fUpload; - }; + virtual void execute(GrVkGpuRTCommandBuffer*) = 0; - struct CopyInfo { - CopyInfo(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect, - const SkIPoint& dstPoint, bool shouldDiscardDst) - : fSrc(src) - , fSrcOrigin(srcOrigin) - , fSrcRect(srcRect) - , fDstPoint(dstPoint) - , fShouldDiscardDst(shouldDiscardDst) {} - using Src = GrPendingIOResource; - Src fSrc; - GrSurfaceOrigin fSrcOrigin; - SkIRect fSrcRect; - SkIPoint fDstPoint; - bool fShouldDiscardDst; + protected: + PreCommandBufferTask() = default; + PreCommandBufferTask(const PreCommandBufferTask&) = delete; + PreCommandBufferTask& operator=(const PreCommandBufferTask&) = delete; }; enum class LoadStoreState { @@ -183,14 +172,11 @@ private: using SampledTexture = GrPendingIOResource; const GrVkRenderPass* fRenderPass; SkTArray fCommandBuffers; + int fNumPreCmds = 0; VkClearValue fColorClearValue; SkRect fBounds; bool fIsEmpty = true; LoadStoreState fLoadStoreState = LoadStoreState::kUnknown; - // The PreDrawUploads and PreCopies are sent to the GPU before submitting the secondary - // command buffer. - SkTArray fPreDrawUploads; - SkTArray fPreCopies; // Array of images that will be sampled and thus need to be transferred to sampled layout // before submitting the secondary command buffers. This must happen after we do any predraw // uploads or copies. @@ -201,16 +187,16 @@ private: } }; - SkTArray fCommandBufferInfos; - int fCurrentCmdInfo; - - GrVkGpu* fGpu; - VkAttachmentLoadOp fVkColorLoadOp; - VkAttachmentStoreOp fVkColorStoreOp; - VkAttachmentLoadOp fVkStencilLoadOp; - VkAttachmentStoreOp fVkStencilStoreOp; - SkPMColor4f fClearColor; - GrVkPipelineState* fLastPipelineState; + SkTArray fCommandBufferInfos; + GrTRecorder fPreCommandBufferTasks{1024}; + GrVkGpu* fGpu; + GrVkPipelineState* fLastPipelineState = nullptr; + SkPMColor4f fClearColor; + VkAttachmentLoadOp fVkColorLoadOp; + VkAttachmentStoreOp fVkColorStoreOp; + VkAttachmentLoadOp fVkStencilLoadOp; + VkAttachmentStoreOp fVkStencilStoreOp; + int fCurrentCmdInfo = -1; typedef GrGpuRTCommandBuffer INHERITED; }; diff --git a/tests/GrTRecorderTest.cpp b/tests/GrTRecorderTest.cpp index 47658750f7..fcdd4b6e83 100644 --- a/tests/GrTRecorderTest.cpp +++ b/tests/GrTRecorderTest.cpp @@ -14,7 +14,7 @@ //////////////////////////////////////////////////////////////////////////////// -static int activeRecorderItems = 0; +static int gActiveRecorderItems = 0; class IntWrapper { public: @@ -25,92 +25,43 @@ private: int fValue; }; -static void test_empty_back_and_pop(skiatest::Reporter* reporter) { - SkRandom rand; - for (int data = 0; data < 2; ++data) { - // Do this with different starting sizes to have different alignment between blocks and pops. - // pops. We want to test poping the first guy off, guys in the middle of the block, and the - // first guy on a non-head block. - for (int j = 0; j < 8; ++j) { - GrTRecorder recorder(j); - - REPORTER_ASSERT(reporter, recorder.empty()); - - for (int i = 0; i < 100; ++i) { - if (data) { - REPORTER_ASSERT(reporter, i == *GrNEW_APPEND_TO_RECORDER(recorder, - IntWrapper, (i))); - } else { - REPORTER_ASSERT(reporter, i == - *GrNEW_APPEND_WITH_DATA_TO_RECORDER(recorder, - IntWrapper, (i), - rand.nextULessThan(10))); - } - REPORTER_ASSERT(reporter, !recorder.empty()); - REPORTER_ASSERT(reporter, i == recorder.back()); - if (0 == (i % 7)) { - recorder.pop_back(); - if (i > 0) { - REPORTER_ASSERT(reporter, !recorder.empty()); - REPORTER_ASSERT(reporter, i-1 == recorder.back()); - } - } - } - - REPORTER_ASSERT(reporter, !recorder.empty()); - recorder.reset(); - REPORTER_ASSERT(reporter, recorder.empty()); - } - } -} - struct ExtraData { - typedef GrTRecorder Recorder; + typedef GrTRecorder Recorder; ExtraData(int i) : fData(i) { int* extraData = this->extraData(); for (int j = 0; j < i; j++) { extraData[j] = i; } - ++activeRecorderItems; - } - ~ExtraData() { - --activeRecorderItems; - } - int* extraData() { - return reinterpret_cast(Recorder::GetDataForItem(this)); + ++gActiveRecorderItems; } + ~ExtraData() { --gActiveRecorderItems; } + int* extraData() { return reinterpret_cast(this + 1); } int fData; }; static void test_extra_data(skiatest::Reporter* reporter) { ExtraData::Recorder recorder(0); + REPORTER_ASSERT(reporter, recorder.empty()); for (int i = 0; i < 100; ++i) { - GrNEW_APPEND_WITH_DATA_TO_RECORDER(recorder, ExtraData, (i), i * sizeof(int)); + recorder.emplaceWithData(i * sizeof(int), i); + REPORTER_ASSERT(reporter, !recorder.empty()); } - REPORTER_ASSERT(reporter, 100 == activeRecorderItems); + REPORTER_ASSERT(reporter, 100 == gActiveRecorderItems); - ExtraData::Recorder::Iter iter(recorder); - for (int i = 0; i < 100; ++i) { - REPORTER_ASSERT(reporter, iter.next()); + auto iter = recorder.begin(); + for (int i = 0; i < 100; ++i, ++iter) { REPORTER_ASSERT(reporter, i == iter->fData); for (int j = 0; j < i; j++) { REPORTER_ASSERT(reporter, i == iter->extraData()[j]); } } - REPORTER_ASSERT(reporter, !iter.next()); - - ExtraData::Recorder::ReverseIter reverseIter(recorder); - for (int i = 99; i >= 0; --i) { - REPORTER_ASSERT(reporter, i == reverseIter->fData); - for (int j = 0; j < i; j++) { - REPORTER_ASSERT(reporter, i == reverseIter->extraData()[j]); - } - REPORTER_ASSERT(reporter, reverseIter.previous() == !!i); - } + REPORTER_ASSERT(reporter, iter == recorder.end()); recorder.reset(); - REPORTER_ASSERT(reporter, 0 == activeRecorderItems); + REPORTER_ASSERT(reporter, 0 == gActiveRecorderItems); + REPORTER_ASSERT(reporter, recorder.begin() == recorder.end()); + REPORTER_ASSERT(reporter, recorder.empty()); } enum ClassType { @@ -125,14 +76,14 @@ enum ClassType { class Base { public: - typedef GrTRecorder Recorder; + typedef GrTRecorder Recorder; Base() { fMatrix.reset(); - ++activeRecorderItems; + ++gActiveRecorderItems; } - virtual ~Base() { --activeRecorderItems; } + virtual ~Base() { --gActiveRecorderItems; } virtual ClassType getType() { return kBase_ClassType; } @@ -179,7 +130,7 @@ private: class SubclassExtraData : public Base { public: SubclassExtraData(int length) : fLength(length) { - int* data = reinterpret_cast(Recorder::GetDataForItem(this)); + int* data = reinterpret_cast(this + 1); for (int i = 0; i < fLength; ++i) { data[i] = ValueAt(i); } @@ -189,7 +140,7 @@ public: virtual void validate(skiatest::Reporter* reporter) const { Base::validate(reporter); - const int* data = reinterpret_cast(Recorder::GetDataForItem(this)); + const int* data = reinterpret_cast(this + 1); for (int i = 0; i < fLength; ++i) { REPORTER_ASSERT(reporter, ValueAt(i) == data[i]); } @@ -218,8 +169,9 @@ public: private: uint32_t fCurrent; }; -static void test_subclasses_iters(skiatest::Reporter*, Order&, Base::Recorder::Iter&, - Base::Recorder::ReverseIter&, int = 0); + +static void test_subclasses_iter(skiatest::Reporter*, Order&, Base::Recorder::iterator&, int = 0); + static void test_subclasses(skiatest::Reporter* reporter) { Base::Recorder recorder(1024); @@ -227,23 +179,23 @@ static void test_subclasses(skiatest::Reporter* reporter) { for (int i = 0; i < 1000; i++) { switch (order.next()) { case kBase_ClassType: - GrNEW_APPEND_TO_RECORDER(recorder, Base, ()); + recorder.emplace(); break; case kSubclass_ClassType: - GrNEW_APPEND_TO_RECORDER(recorder, Subclass, ()); + recorder.emplace(); break; case kSubSubclass_ClassType: - GrNEW_APPEND_TO_RECORDER(recorder, SubSubclass, ()); + recorder.emplace(); break; case kSubclassExtraData_ClassType: - GrNEW_APPEND_WITH_DATA_TO_RECORDER(recorder, SubclassExtraData, (i), sizeof(int) * i); + recorder.emplaceWithData(sizeof(int) * i, i); break; case kSubclassEmpty_ClassType: - GrNEW_APPEND_TO_RECORDER(recorder, SubclassEmpty, ()); + recorder.emplace(); break; default: @@ -251,44 +203,89 @@ static void test_subclasses(skiatest::Reporter* reporter) { break; } } - REPORTER_ASSERT(reporter, 1000 == activeRecorderItems); + REPORTER_ASSERT(reporter, 1000 == gActiveRecorderItems); order.reset(); - Base::Recorder::Iter iter(recorder); - Base::Recorder::ReverseIter reverseIter(recorder); + auto iter = recorder.begin(); - test_subclasses_iters(reporter, order, iter, reverseIter); - - REPORTER_ASSERT(reporter, !iter.next()); + test_subclasses_iter(reporter, order, iter); + REPORTER_ASSERT(reporter, iter == recorder.end()); // Don't reset the recorder. It should automatically destruct all its items. } -static void test_subclasses_iters(skiatest::Reporter* reporter, Order& order, - Base::Recorder::Iter& iter, - Base::Recorder::ReverseIter& reverseIter, int i) { +static void test_subclasses_iter(skiatest::Reporter* reporter, Order& order, + Base::Recorder::iterator& iter, int i) { if (i >= 1000) { return; } ClassType classType = order.next(); - REPORTER_ASSERT(reporter, iter.next()); REPORTER_ASSERT(reporter, classType == iter->getType()); iter->validate(reporter); - test_subclasses_iters(reporter, order, iter, reverseIter, i + 1); + ++iter; + test_subclasses_iter(reporter, order, iter, i + 1); +} - REPORTER_ASSERT(reporter, classType == reverseIter->getType()); - reverseIter->validate(reporter); - REPORTER_ASSERT(reporter, reverseIter.previous() == !!i); +struct AlignBase { + AlignBase() { ++gActiveRecorderItems; } + ~AlignBase() { --gActiveRecorderItems; } + char fValue; +}; +struct alignas(16) Align16 : public AlignBase {}; +struct alignas(32) Align32 : public AlignBase {}; +struct alignas(64) Align64 : public AlignBase {}; +struct alignas(128) Align128 : public AlignBase {}; + +static void test_alignment(skiatest::Reporter* reporter) { + GrTRecorder recorder(0); + SkTArray expectedAlignments; + SkRandom random; + for (int i = 0; i < 100; ++i) { + size_t dataSize = random.nextULessThan(20); + switch (random.nextULessThan(5)) { + case 0: + recorder.emplaceWithData(dataSize); + expectedAlignments.push_back(alignof(AlignBase)); + break; + case 1: + recorder.emplaceWithData(dataSize); + expectedAlignments.push_back(16); + break; + case 2: + recorder.emplaceWithData(dataSize); + expectedAlignments.push_back(32); + break; + case 3: + recorder.emplaceWithData(dataSize); + expectedAlignments.push_back(64); + break; + case 4: + recorder.emplaceWithData(dataSize); + expectedAlignments.push_back(128); + break; + } + recorder.back().fValue = i; + } + int i = 0; + for (const auto& x : recorder) { + REPORTER_ASSERT(reporter, x.fValue == i); + auto pointer = reinterpret_cast(&x); + auto mask = static_cast(expectedAlignments[i]) - 1; + REPORTER_ASSERT(reporter, !(pointer & mask)); + i++; + } + REPORTER_ASSERT(reporter, i == 100); } DEF_GPUTEST(GrTRecorder, reporter, /* options */) { - test_empty_back_and_pop(reporter); - test_extra_data(reporter); - REPORTER_ASSERT(reporter, 0 == activeRecorderItems); // test_extra_data should call reset(). + REPORTER_ASSERT(reporter, 0 == gActiveRecorderItems); // test_extra_data should call reset(). test_subclasses(reporter); - REPORTER_ASSERT(reporter, 0 == activeRecorderItems); // Ensure ~GrTRecorder invokes dtors. + REPORTER_ASSERT(reporter, 0 == gActiveRecorderItems); // Ensure ~GrTRecorder invokes dtors. + + test_alignment(reporter); + REPORTER_ASSERT(reporter, 0 == gActiveRecorderItems); // Ensure ~GrTRecorder invokes dtors. }