From 64d78cd8a624727c4d20b157e95c4b1155168adf Mon Sep 17 00:00:00 2001 From: Adam Sawicki Date: Mon, 16 Mar 2020 19:36:15 +0100 Subject: [PATCH] Implement Allocation::WasZeroInitialized --- src/D3D12MemAlloc.cpp | 97 +++++++++++++++++++++--- src/D3D12MemAlloc.h | 27 ++++++- src/Tests.cpp | 167 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 276 insertions(+), 15 deletions(-) diff --git a/src/D3D12MemAlloc.cpp b/src/D3D12MemAlloc.cpp index 73da744..6c55b6e 100644 --- a/src/D3D12MemAlloc.cpp +++ b/src/D3D12MemAlloc.cpp @@ -92,7 +92,8 @@ It's mostly for automatic usage of the cryptic, undocumented flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS. Its absence doesn't seem to change anything but better to use it always, just in case. */ - #define D3D12MA_EXTRA_DEFAULT_TYPE_HEAP_FLAGS (D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS) + //#define D3D12MA_EXTRA_DEFAULT_TYPE_HEAP_FLAGS (D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS) + #define D3D12MA_EXTRA_DEFAULT_TYPE_HEAP_FLAGS (D3D12_HEAP_FLAG_NONE) #endif //////////////////////////////////////////////////////////////////////////////// @@ -2030,6 +2031,64 @@ struct AllocationRequest UINT64 sumFreeSize; // Sum size of free items that overlap with proposed allocation. UINT64 sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. SuballocationList::iterator item; + BOOL zeroInitialized; +}; + +/* +Keeps track of the range of bytes that are surely initialized with zeros. +Everything outside of it is considered uninitialized memory that may contain +garbage data. + +The range is left-inclusive. +*/ +class ZeroInitializedRange +{ +public: + void Reset(UINT64 size) + { + D3D12MA_ASSERT(size > 0); + m_ZeroBeg = 0; + m_ZeroEnd = size; + } + + BOOL IsRangeZeroInitialized(UINT64 beg, UINT64 end) const + { + D3D12MA_ASSERT(beg < end); + return m_ZeroBeg <= beg && end <= m_ZeroEnd; + } + + void MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd) + { + D3D12MA_ASSERT(usedBeg < usedEnd); + // No new bytes marked. + if(usedEnd <= m_ZeroBeg || m_ZeroEnd <= usedBeg) + { + return; + } + // All bytes marked. + if(usedBeg <= m_ZeroBeg && m_ZeroEnd <= usedEnd) + { + m_ZeroBeg = m_ZeroEnd = 0; + } + // Some bytes marked. + else + { + const UINT64 remainingZeroBefore = usedBeg > m_ZeroBeg ? usedBeg - m_ZeroBeg : 0; + const UINT64 remainingZeroAfter = usedEnd < m_ZeroEnd ? m_ZeroEnd - usedEnd : 0; + D3D12MA_ASSERT(remainingZeroBefore > 0 || remainingZeroAfter > 0); + if(remainingZeroBefore > remainingZeroAfter) + { + m_ZeroEnd = usedBeg; + } + else + { + m_ZeroBeg = usedEnd; + } + } + } + +private: + UINT64 m_ZeroBeg = 0, m_ZeroEnd = 0; }; /* @@ -2119,6 +2178,7 @@ private: // Suballocations that are free and have size greater than certain threshold. // Sorted by size, ascending. Vector m_FreeSuballocationsBySize; + ZeroInitializedRange m_ZeroInitializedRange; bool ValidateFreeSuballocationList() const; @@ -2130,7 +2190,8 @@ private: SuballocationList::const_iterator suballocItem, UINT64* pOffset, UINT64* pSumFreeSize, - UINT64* pSumItemSize) const; + UINT64* pSumItemSize, + BOOL *pZeroInitialized) const; // Given free suballocation, it merges it with following one, which must also be free. void MergeFreeWithNext(SuballocationList::iterator item); // Releases given suballocation, making it free. @@ -2523,6 +2584,7 @@ BlockMetadata_Generic::~BlockMetadata_Generic() void BlockMetadata_Generic::Init(UINT64 size) { BlockMetadata::Init(size); + m_ZeroInitializedRange.Reset(size); m_FreeCount = 1; m_SumFreeSize = size; @@ -2673,7 +2735,8 @@ bool BlockMetadata_Generic::CreateAllocationRequest( m_FreeSuballocationsBySize[index], &pAllocationRequest->offset, &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) + &pAllocationRequest->sumItemSize, + &pAllocationRequest->zeroInitialized)) { pAllocationRequest->item = m_FreeSuballocationsBySize[index]; return true; @@ -2745,6 +2808,8 @@ void BlockMetadata_Generic::Alloc( ++m_FreeCount; } m_SumFreeSize -= allocSize; + + m_ZeroInitializedRange.MarkRangeAsUsed(request.offset, request.offset + allocSize); } void BlockMetadata_Generic::Free(const Allocation* allocation) @@ -2801,14 +2866,16 @@ bool BlockMetadata_Generic::CheckAllocation( SuballocationList::const_iterator suballocItem, UINT64* pOffset, UINT64* pSumFreeSize, - UINT64* pSumItemSize) const + UINT64* pSumItemSize, + BOOL *pZeroInitialized) const { D3D12MA_ASSERT(allocSize > 0); D3D12MA_ASSERT(suballocItem != m_Suballocations.cend()); - D3D12MA_ASSERT(pOffset != NULL); + D3D12MA_ASSERT(pOffset != NULL && pZeroInitialized != NULL); *pSumFreeSize = 0; *pSumItemSize = 0; + *pZeroInitialized = FALSE; const Suballocation& suballoc = *suballocItem; D3D12MA_ASSERT(suballoc.type == SUBALLOCATION_TYPE_FREE); @@ -2846,6 +2913,7 @@ bool BlockMetadata_Generic::CheckAllocation( } // All tests passed: Success. pOffset is already filled. + *pZeroInitialized = m_ZeroInitializedRange.IsRangeZeroInitialized(*pOffset, *pOffset + allocSize); return true; } @@ -3479,7 +3547,7 @@ HRESULT BlockVector::AllocateFromBlock( m_HasEmptyBlock = false; } - *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size); + *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, currRequest.zeroInitialized); pBlock->m_pMetadata->Alloc(currRequest, size, *pAllocation); (*pAllocation)->InitPlaced(currRequest.offset, alignment, pBlock); D3D12MA_HEAVY_ASSERT(pBlock->Validate()); @@ -3859,7 +3927,8 @@ HRESULT AllocatorPimpl::AllocateCommittedResource( pOptimizedClearValue, riidResource, (void**)&res); if(SUCCEEDED(hr)) { - Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resAllocInfo.SizeInBytes); + const BOOL wasZeroInitialized = TRUE; + Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resAllocInfo.SizeInBytes, wasZeroInitialized); alloc->InitCommitted(pAllocDesc->HeapType); alloc->SetResource(res, pResourceDesc); @@ -3917,7 +3986,8 @@ HRESULT AllocatorPimpl::AllocateHeap( HRESULT hr = m_Device->CreateHeap(&heapDesc, __uuidof(*heap), (void**)&heap); if(SUCCEEDED(hr)) { - (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes); + const BOOL wasZeroInitialized = TRUE; + (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, wasZeroInitialized); (*ppAllocation)->InitHeap(pAllocDesc->HeapType, heap); RegisterCommittedAllocation(*ppAllocation, pAllocDesc->HeapType); @@ -4614,15 +4684,20 @@ void Allocation::SetName(LPCWSTR Name) } } -Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size) : +Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, BOOL wasZeroInitialized) : m_Allocator{allocator}, m_Size{size}, m_Resource{NULL}, m_CreationFrameIndex{allocator->GetCurrentFrameIndex()}, - m_Name{NULL}, - m_PackedData{TYPE_COUNT, D3D12_RESOURCE_DIMENSION_UNKNOWN, D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT_UNKNOWN} + m_Name{NULL} { D3D12MA_ASSERT(allocator); + + m_PackedData.SetType(TYPE_COUNT); + m_PackedData.SetResourceDimension(D3D12_RESOURCE_DIMENSION_UNKNOWN); + m_PackedData.SetResourceFlags(D3D12_RESOURCE_FLAG_NONE); + m_PackedData.SetTextureLayout(D3D12_TEXTURE_LAYOUT_UNKNOWN); + m_PackedData.SetWasZeroInitialized(wasZeroInitialized); } Allocation::~Allocation() diff --git a/src/D3D12MemAlloc.h b/src/D3D12MemAlloc.h index 4e70df2..2f0870d 100644 --- a/src/D3D12MemAlloc.h +++ b/src/D3D12MemAlloc.h @@ -507,6 +507,24 @@ public: */ LPCWSTR GetName() const { return m_Name; } + /** \brief Returns `TRUE` if the memory of the allocation was filled with zeros when the allocation was created. + + Returns `TRUE` only if the allocator is sure that the entire memory where the + allocation was created was filled with zeros at the moment the allocation was made. + + Returns `FALSE` if the memory could potentially contain garbage data. + If it's a render-target or depth-stencil texture, it then needs proper + initialization with `ClearRenderTargetView`, `ClearDepthStencilView`, `DiscardResource`, + or a copy operation, as described on page: + [ID3D12Device::CreatePlacedResource method - Notes on the required resource initialization](https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-createplacedresource#notes-on-the-required-resource-initialization). + Please note that rendering a fullscreen triangle or quad to the texture as + a render target is not a proper way of initialization! + + See also article: + [Coming to DirectX 12: More control over memory allocation](https://devblogs.microsoft.com/directx/coming-to-directx-12-more-control-over-memory-allocation/). + */ + BOOL WasZeroInitialized() const { return m_PackedData.WasZeroInitialized(); } + private: friend class AllocatorPimpl; friend class BlockVector; @@ -552,28 +570,29 @@ private: { public: PackedData() : - m_Type(0), m_ResourceDimension(0), m_ResourceFlags(0), m_TextureLayout(0) { } - PackedData(Type type, D3D12_RESOURCE_DIMENSION resourceDimension, D3D12_RESOURCE_FLAGS resourceFlags, D3D12_TEXTURE_LAYOUT textureLayout) : - m_Type(type), m_ResourceDimension(resourceDimension), m_ResourceFlags(resourceFlags), m_TextureLayout(textureLayout) { } + m_Type(0), m_ResourceDimension(0), m_ResourceFlags(0), m_TextureLayout(0), m_WasZeroInitialized(0) { } Type GetType() const { return (Type)m_Type; } D3D12_RESOURCE_DIMENSION GetResourceDimension() const { return (D3D12_RESOURCE_DIMENSION)m_ResourceDimension; } D3D12_RESOURCE_FLAGS GetResourceFlags() const { return (D3D12_RESOURCE_FLAGS)m_ResourceFlags; } D3D12_TEXTURE_LAYOUT GetTextureLayout() const { return (D3D12_TEXTURE_LAYOUT)m_TextureLayout; } + BOOL WasZeroInitialized() const { return (BOOL)m_WasZeroInitialized; } void SetType(Type type); void SetResourceDimension(D3D12_RESOURCE_DIMENSION resourceDimension); void SetResourceFlags(D3D12_RESOURCE_FLAGS resourceFlags); void SetTextureLayout(D3D12_TEXTURE_LAYOUT textureLayout); + void SetWasZeroInitialized(BOOL wasZeroInitialized) { m_WasZeroInitialized = wasZeroInitialized ? 1 : 0; } private: UINT m_Type : 2; // enum Type UINT m_ResourceDimension : 3; // enum D3D12_RESOURCE_DIMENSION UINT m_ResourceFlags : 7; // flags D3D12_RESOURCE_FLAGS UINT m_TextureLayout : 2; // enum D3D12_TEXTURE_LAYOUT + UINT m_WasZeroInitialized : 1; // BOOL } m_PackedData; - Allocation(AllocatorPimpl* allocator, UINT64 size); + Allocation(AllocatorPimpl* allocator, UINT64 size, BOOL wasZeroInitialized); ~Allocation(); void InitCommitted(D3D12_HEAP_TYPE heapType); void InitPlaced(UINT64 offset, UINT64 alignment, NormalBlock* block); diff --git a/src/Tests.cpp b/src/Tests.cpp index b758b4c..113c64a 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -46,6 +46,14 @@ struct ResourceWithAllocation AllocationUniquePtr allocation; UINT64 size = UINT64_MAX; UINT dataSeed = 0; + + void Reset() + { + resource.Release(); + allocation.reset(); + size = UINT64_MAX; + dataSeed = 0; + } }; static void FillResourceDescForBuffer(D3D12_RESOURCE_DESC& outResourceDesc, UINT64 size) @@ -91,6 +99,21 @@ static bool ValidateData(const void* ptr, const UINT64 sizeInBytes, UINT seed) return true; } +static bool ValidateDataZero(const void* ptr, const UINT64 sizeInBytes) +{ + const UINT* values = (const UINT*)ptr; + const UINT64 sizeInValues = sizeInBytes / sizeof(UINT); + for(UINT i = 0; i < sizeInValues; ++i) + { + if(values[i] != 0) + { + //FAIL("ValidateData failed."); + return false; + } + } + return true; +} + static void TestFrameIndexAndJson(const TestContext& ctx) { const UINT64 bufSize = 32ull * 1024; @@ -638,6 +661,149 @@ static void TestTransfer(const TestContext& ctx) } } +static void TestZeroInitialized(const TestContext& ctx) +{ + wprintf(L"Test zero initialized\n"); + + const UINT64 bufSize = 128ull * 1024; + D3D12MA::Allocation* alloc = nullptr; + + D3D12_RESOURCE_DESC resourceDesc; + FillResourceDescForBuffer(resourceDesc, bufSize); + + // # Create upload buffer and fill it with data. + + D3D12MA::ALLOCATION_DESC allocDescUpload = {}; + allocDescUpload.HeapType = D3D12_HEAP_TYPE_UPLOAD; + + ResourceWithAllocation bufUpload; + CHECK_HR( ctx.allocator->CreateResource( + &allocDescUpload, + &resourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + &alloc, + IID_PPV_ARGS(&bufUpload.resource)) ); + bufUpload.allocation.reset(alloc); + + { + void* mappedPtr = nullptr; + CHECK_HR( bufUpload.resource->Map(0, NULL, &mappedPtr) ); + FillData(mappedPtr, bufSize, 5236245); + bufUpload.resource->Unmap(0, NULL); + } + + // # Create readback buffer + + D3D12MA::ALLOCATION_DESC allocDescReadback = {}; + allocDescReadback.HeapType = D3D12_HEAP_TYPE_READBACK; + + ResourceWithAllocation bufReadback; + CHECK_HR( ctx.allocator->CreateResource( + &allocDescReadback, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + &alloc, + IID_PPV_ARGS(&bufReadback.resource)) ); + bufReadback.allocation.reset(alloc); + + auto CheckBufferData = [&](const ResourceWithAllocation& buf) + { + const bool shouldBeZero = buf.allocation->WasZeroInitialized() != FALSE; + + { + ID3D12GraphicsCommandList* cmdList = BeginCommandList(); + cmdList->CopyBufferRegion(bufReadback.resource, 0, buf.resource, 0, bufSize); + EndCommandList(cmdList); + } + + bool isZero = false; + { + void* mappedPtr = nullptr; + CHECK_HR( bufReadback.resource->Map(0, NULL, &mappedPtr) ); + isZero = ValidateDataZero(mappedPtr, bufSize); + bufReadback.resource->Unmap(0, NULL); + } + + wprintf(L"Should be zero: %u, is zero: %u\n", shouldBeZero ? 1 : 0, isZero ? 1 : 0); + + if(shouldBeZero) + { + CHECK_BOOL(isZero); + } + }; + + // # Test 1: Committed resource. Should always be zero initialized. + + { + D3D12MA::ALLOCATION_DESC allocDescDefault = {}; + allocDescDefault.HeapType = D3D12_HEAP_TYPE_DEFAULT; + allocDescDefault.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED; + + ResourceWithAllocation bufDefault; + CHECK_HR( ctx.allocator->CreateResource( + &allocDescDefault, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_SOURCE, + NULL, + &alloc, + IID_PPV_ARGS(&bufDefault.resource)) ); + bufDefault.allocation.reset(alloc); + + wprintf(L" Committed: "); + CheckBufferData(bufDefault); + CHECK_BOOL( bufDefault.allocation->WasZeroInitialized() ); + } + + // # Test 2: (Probably) placed resource. + + ResourceWithAllocation bufDefault; + for(uint32_t i = 0; i < 2; ++i) + { + // 1. Create buffer + + D3D12MA::ALLOCATION_DESC allocDescDefault = {}; + allocDescDefault.HeapType = D3D12_HEAP_TYPE_DEFAULT; + + CHECK_HR( ctx.allocator->CreateResource( + &allocDescDefault, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_SOURCE, + NULL, + &alloc, + IID_PPV_ARGS(&bufDefault.resource)) ); + bufDefault.allocation.reset(alloc); + + // 2. Check it + + wprintf(L" Normal #%u: ", i); + CheckBufferData(bufDefault); + + // 3. Upload some data to it + + { + ID3D12GraphicsCommandList* cmdList = BeginCommandList(); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = bufDefault.resource; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + cmdList->ResourceBarrier(1, &barrier); + + cmdList->CopyBufferRegion(bufDefault.resource, 0, bufUpload.resource, 0, bufSize); + + EndCommandList(cmdList); + } + + // 4. Delete it + + bufDefault.Reset(); + } +} + static void TestMultithreading(const TestContext& ctx) { wprintf(L"Test multithreading\n"); @@ -780,6 +946,7 @@ static void TestGroupBasics(const TestContext& ctx) TestMapping(ctx); TestStats(ctx); TestTransfer(ctx); + TestZeroInitialized(ctx); TestMultithreading(ctx); }