diff --git a/include/D3D12MemAlloc.h b/include/D3D12MemAlloc.h index fc79720..6115c71 100644 --- a/include/D3D12MemAlloc.h +++ b/include/D3D12MemAlloc.h @@ -402,8 +402,9 @@ struct TotalStatistics - 1 = `D3D12_HEAP_TYPE_UPLOAD` - 2 = `D3D12_HEAP_TYPE_READBACK` - 3 = `D3D12_HEAP_TYPE_CUSTOM` + - 4 = `D3D12_HEAP_TYPE_GPU_UPLOAD` */ - DetailedStatistics HeapType[4]; + DetailedStatistics HeapType[5]; /** \brief One element for each memory segment group located at the following indices: - 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` @@ -413,9 +414,9 @@ struct TotalStatistics - When `IsUMA() == FALSE` (discrete graphics card): - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory - (resources allocated in `D3D12_HEAP_TYPE_DEFAULT` or `D3D12_MEMORY_POOL_L1`). + (resources allocated in `D3D12_HEAP_TYPE_DEFAULT`, `D3D12_HEAP_TYPE_GPU_UPLOAD` or `D3D12_MEMORY_POOL_L1`). - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory - (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). + (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). - When `IsUMA() == TRUE` (integrated graphics chip): - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources. - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0. @@ -1140,6 +1141,15 @@ public: - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" */ BOOL IsCacheCoherentUMA() const; + /** \brief Returns true if GPU Upload Heaps are supported on the current system. + + When true, you can use `D3D12_HEAP_TYPE_GPU_UPLOAD`. + + This flag is fetched from `D3D12_FEATURE_D3D12_OPTIONS16::GPUUploadHeapSupported`. + + `#define D3D12MA_OPTIONS16_SUPPORTED 1` is needed for the compilation of this library. Otherwise the flag is always false. + */ + BOOL IsGPUUploadHeapSupported() const; /** \brief Returns total amount of memory of specific segment group, in bytes. \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d8d42f..1537262 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,5 @@ +cmake_minimum_required(VERSION 3.25) + set(D3D12MA_LIBRARY_SOURCE_FILES D3D12MemAlloc.cpp "${PROJECT_SOURCE_DIR}/include/D3D12MemAlloc.h" @@ -150,6 +152,12 @@ if(D3D12MA_AGILITY_SDK_DIRECTORY) if(D3D12MA_AGILITY_SDK_PREVIEW) target_compile_definitions(D3D12Sample PRIVATE D3D12MA_USE_AGILITY_SDK_PREVIEW=1) endif() + add_custom_command(TARGET D3D12Sample POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory "$/D3D12" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/bin/x64/D3D12Core.dll" + "$/D3D12/" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/bin/x64/d3d12SDKLayers.dll" + "$/D3D12/") endif() else() message(FATAL_ERROR "DX12 Agility SDK not found - cannot find file \"${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/include/d3d12.h\".") @@ -157,3 +165,11 @@ if(D3D12MA_AGILITY_SDK_DIRECTORY) else() message(STATUS "DX12 Agility SDK not used.") endif() + +option(D3D12MA_OPTIONS16_SUPPORTED "Set if using Agility SDK 1.710.0-preview or newer that defines D3D12_FEATURE_DATA_D3D12_OPTIONS16." OFF) +if(D3D12MA_OPTIONS16_SUPPORTED) + target_compile_definitions(D3D12MemoryAllocator PRIVATE D3D12MA_OPTIONS16_SUPPORTED=1) + if(${D3D12MA_BUILD_SAMPLE} AND ${WIN32}) + target_compile_definitions(D3D12Sample PRIVATE D3D12MA_OPTIONS16_SUPPORTED=1) + endif() +endif() diff --git a/src/D3D12MemAlloc.cpp b/src/D3D12MemAlloc.cpp index e405fe8..287a8ab 100644 --- a/src/D3D12MemAlloc.cpp +++ b/src/D3D12MemAlloc.cpp @@ -134,9 +134,9 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. namespace D3D12MA { -static constexpr UINT HEAP_TYPE_COUNT = 4; -static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK. -static constexpr UINT DEFAULT_POOL_MAX_COUNT = 9; +static constexpr UINT HEAP_TYPE_COUNT = 5; +static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 4; // Only DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. +static constexpr UINT DEFAULT_POOL_MAX_COUNT = STANDARD_HEAP_TYPE_COUNT * 3; static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; // Minimum size of a free suballocation to register it in the free suballocation collection. static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; @@ -147,12 +147,14 @@ static const WCHAR* const HeapTypeNames[] = L"UPLOAD", L"READBACK", L"CUSTOM", + L"GPU_UPLOAD", }; static const WCHAR* const StandardHeapTypeNames[] = { L"DEFAULT", L"UPLOAD", L"READBACK", + L"GPU_UPLOAD", }; static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = @@ -160,6 +162,8 @@ static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = static const D3D12_RESIDENCY_PRIORITY D3D12_RESIDENCY_PRIORITY_NONE = D3D12_RESIDENCY_PRIORITY(0); +static const D3D12_HEAP_TYPE D3D12_HEAP_TYPE_GPU_UPLOAD_COPY = (D3D12_HEAP_TYPE)5; + #ifndef _D3D12MA_ENUM_DECLARATIONS // Local copy of this enum, as it is provided only by , so it may not be available. @@ -465,6 +469,7 @@ static UINT StandardHeapTypeToIndex(D3D12_HEAP_TYPE type) case D3D12_HEAP_TYPE_DEFAULT: return 0; case D3D12_HEAP_TYPE_UPLOAD: return 1; case D3D12_HEAP_TYPE_READBACK: return 2; + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: return 3; default: D3D12MA_ASSERT(0); return UINT_MAX; } } @@ -476,6 +481,7 @@ static D3D12_HEAP_TYPE IndexToStandardHeapType(UINT heapTypeIndex) case 0: return D3D12_HEAP_TYPE_DEFAULT; case 1: return D3D12_HEAP_TYPE_UPLOAD; case 2: return D3D12_HEAP_TYPE_READBACK; + case 3: return D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; default: D3D12MA_ASSERT(0); return D3D12_HEAP_TYPE_CUSTOM; } } @@ -525,7 +531,8 @@ static bool IsHeapTypeStandard(D3D12_HEAP_TYPE type) { return type == D3D12_HEAP_TYPE_DEFAULT || type == D3D12_HEAP_TYPE_UPLOAD || - type == D3D12_HEAP_TYPE_READBACK; + type == D3D12_HEAP_TYPE_READBACK || + type == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; } static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) @@ -6567,6 +6574,7 @@ public: BOOL IsUMA() const { return m_D3D12Architecture.UMA; } BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } + bool IsGPUUploadHeapSupported() const { return m_GPUUploadHeapSupported != FALSE; } bool UseMutex() const { return m_UseMutex; } AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } @@ -6575,6 +6583,7 @@ public: 0: D3D12_HEAP_TYPE_DEFAULT 1: D3D12_HEAP_TYPE_UPLOAD 2: D3D12_HEAP_TYPE_READBACK + 3: D3D12_HEAP_TYPE_GPU_UPLOAD else: 0: D3D12_HEAP_TYPE_DEFAULT + buffer 1: D3D12_HEAP_TYPE_DEFAULT + texture @@ -6585,8 +6594,11 @@ public: 6: D3D12_HEAP_TYPE_READBACK + buffer 7: D3D12_HEAP_TYPE_READBACK + texture 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS + 9: D3D12_HEAP_TYPE_GPU_UPLOAD + buffer + 10: D3D12_HEAP_TYPE_GPU_UPLOAD + texture + 11: D3D12_HEAP_TYPE_GPU_UPLOAD + texture RT or DS */ - UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 3 : 9; } + UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 4 : 12; } BlockVector** GetDefaultPools() { return m_BlockVectors; } HRESULT Init(const ALLOCATOR_DESC& desc); @@ -6673,6 +6685,7 @@ private: D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex; DXGI_ADAPTER_DESC m_AdapterDesc; D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; + BOOL m_GPUUploadHeapSupported = FALSE; D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; AllocationObjectAllocator m_AllocationObjectAllocator; @@ -6814,6 +6827,20 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER); #endif +// You must define this macro to like `#define D3D12MA_OPTIONS16_SUPPORTED 1` to enable GPU Upload Heaps! +// Unfortunately there is no way to programmatically check if the included defines D3D12_FEATURE_DATA_D3D12_OPTIONS16 or not. +// Main interfaces have respective macros like __ID3D12Device4_INTERFACE_DEFINED__, but structures like this do not. +#if D3D12MA_OPTIONS16_SUPPORTED + { + D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16 = {}; + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16)); + if (SUCCEEDED(hr)) + { + m_GPUUploadHeapSupported = options16.GPUUploadHeapSupported; + } + } +#endif + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); if (FAILED(hr)) { @@ -6913,7 +6940,7 @@ UINT AllocatorPimpl::StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapTy D3D12MA_ASSERT(IsHeapTypeStandard(heapType)); if (IsUMA()) return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; - return heapType == D3D12_HEAP_TYPE_DEFAULT ? + return (heapType == D3D12_HEAP_TYPE_DEFAULT || heapType == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY) ? DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; } @@ -7279,15 +7306,16 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat ClearDetailedStatistics(outCustomHeaps[1]); } - // Process default pools. 3 standard heap types only. Add them to outStats.HeapType[i]. + // Process default pools. 4 standard heap types only. Add them to outStats.HeapType[i]. if (SupportsResourceHeapTier2()) { - // DEFAULT, UPLOAD, READBACK. + // DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) { BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); } } else @@ -7299,7 +7327,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat { BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); } } } @@ -7314,6 +7344,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat AddDetailedStatistics( outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)], outStats.HeapType[2]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_GPU_UPLOAD_COPY)], + outStats.HeapType[4]); // Process custom pools. DetailedStatistics tmpStats; @@ -7338,13 +7371,14 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat } } - // Process committed allocations. 3 standard heap types only. + // Process committed allocations. standard heap types only. for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) { ClearDetailedStatistics(tmpStats); m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 AddDetailedStatistics( - outStats.HeapType[heapTypeIndex], tmpStats); + outStats.HeapType[outputIndex], tmpStats); AddDetailedStatistics( outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToStandardHeapType(heapTypeIndex))], tmpStats); } @@ -7366,19 +7400,24 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount + - outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount); + outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount + + outStats.HeapType[4].Stats.BlockCount); D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount + - outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount); + outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount + + outStats.HeapType[4].Stats.AllocationCount); D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes + - outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes); + outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes + + outStats.HeapType[4].Stats.BlockBytes); D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes + - outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes); + outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes + + outStats.HeapType[4].Stats.AllocationBytes); D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount + - outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount); + outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount + + outStats.HeapType[4].UnusedRangeCount); } void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget) @@ -7426,6 +7465,7 @@ void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE hea switch (heapType) { case D3D12_HEAP_TYPE_DEFAULT: + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: GetBudget(&outBudget, NULL); break; case D3D12_HEAP_TYPE_UPLOAD: @@ -7482,6 +7522,9 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) json.WriteBool(m_D3D12Architecture.UMA); json.WriteString(L"CacheCoherentUMA"); json.WriteBool(m_D3D12Architecture.CacheCoherentUMA); + + json.WriteString(L"GPUUploadHeapSupported"); + json.WriteBool(m_GPUUploadHeapSupported != FALSE); } json.EndObject(); } @@ -7514,6 +7557,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); } json.EndObject(); + + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } } json.WriteString(L"UPLOAD"); json.BeginObject(); @@ -7564,6 +7618,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) } json.EndObject(); + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } + json.WriteString(L"CUSTOM"); json.BeginObject(); { @@ -8012,6 +8077,9 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U outCommittedAllocationParams = CommittedAllocationParameters(); outPreferCommitted = false; + D3D12MA_ASSERT((allocDesc.HeapType != D3D12_HEAP_TYPE_GPU_UPLOAD_COPY || IsGPUUploadHeapSupported()) && + "Trying to allocate from D3D12_HEAP_TYPE_GPU_UPLOAD while GPUUploadHeapSupported == FALSE or D3D12MA_OPTIONS16_SUPPORTED macro was not defined when compiling D3D12MA library."); + bool msaaAlwaysCommitted; if (allocDesc.CustomPool != NULL) { @@ -8111,6 +8179,7 @@ UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, Reso case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break; case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break; case D3D12_HEAP_TYPE_READBACK: poolIndex = 2; break; + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: poolIndex = 3; break; default: D3D12MA_ASSERT(0); } @@ -8166,6 +8235,9 @@ void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_H case 2: outHeapType = D3D12_HEAP_TYPE_READBACK; break; + case 3: + outHeapType = D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; + break; default: D3D12MA_ASSERT(0); } @@ -10173,6 +10245,11 @@ BOOL Allocator::IsCacheCoherentUMA() const return m_Pimpl->IsCacheCoherentUMA(); } +BOOL Allocator::IsGPUUploadHeapSupported() const +{ + return m_Pimpl->IsGPUUploadHeapSupported(); +} + UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const { return m_Pimpl->GetMemoryCapacity(memorySegmentGroup); diff --git a/src/D3D12Sample.cpp b/src/D3D12Sample.cpp index 705cff1..6cb3127 100644 --- a/src/D3D12Sample.cpp +++ b/src/D3D12Sample.cpp @@ -572,6 +572,8 @@ static std::wstring SizeToStr(size_t size) static void PrintAdapterInformation(IDXGIAdapter1* adapter) { + assert(g_Allocator); + wprintf(L"DXGI_ADAPTER_DESC1:\n"); wprintf(L" Description = %s\n", g_AdapterDesc.Description); wprintf(L" VendorId = 0x%X (%s)\n", g_AdapterDesc.VendorId, VendorIDToStr(g_AdapterDesc.VendorId)); @@ -598,6 +600,9 @@ static void PrintAdapterInformation(IDXGIAdapter1* adapter) assert(0); } + wprintf(L"D3D12_FEATURE_DATA_D3D12_OPTIONS16:\n"); + wprintf(L" GPUUploadHeapSupported = %u\n", g_Allocator->IsGPUUploadHeapSupported() ? 1 : 0); + ComPtr adapter3; if(SUCCEEDED(adapter->QueryInterface(IID_PPV_ARGS(&adapter3)))) { diff --git a/src/Tests.cpp b/src/Tests.cpp index 170b87d..0cf5217 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -2983,6 +2983,152 @@ static void TestDevice10(const TestContext& ctx) } #endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ +static void TestGPUUploadHeap(const TestContext& ctx) +{ +#if D3D12MA_OPTIONS16_SUPPORTED + using namespace D3D12MA; + + wprintf(L"Test GPU Upload Heap\n"); + + if(!ctx.allocator->IsGPUUploadHeapSupported()) + { + wprintf(L" Skipped due to GPUUploadHeap not supported.\n"); + return; + } + + Budget begLocalBudget = {}; + ctx.allocator->GetBudget(&begLocalBudget, NULL); + TotalStatistics begStats = {}; + ctx.allocator->CalculateStatistics(&begStats); + + // Create a buffer, likely placed. + ALLOCATION_DESC allocDesc = {}; + allocDesc.HeapType = D3D12_HEAP_TYPE_GPU_UPLOAD; + D3D12_RESOURCE_DESC resDesc; + FillResourceDescForBuffer(resDesc, 64 * KILOBYTE); + + ComPtr alloc; + CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &resDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &alloc, IID_NULL, NULL)); + CHECK_BOOL(alloc && alloc->GetResource()); + CHECK_BOOL(alloc->GetResource()->GetGPUVirtualAddress() != 0); + + { + D3D12_HEAP_PROPERTIES heapProps = {}; + D3D12_HEAP_FLAGS heapFlags = {}; + CHECK_HR(alloc->GetResource()->GetHeapProperties(&heapProps, &heapFlags)); + CHECK_BOOL(heapProps.Type == D3D12_HEAP_TYPE_GPU_UPLOAD); + } + + // Create a committed one. + ALLOCATION_DESC committedAllocDesc = allocDesc; + committedAllocDesc.Flags |= ALLOCATION_FLAG_COMMITTED; + ComPtr committedAlloc; + CHECK_HR(ctx.allocator->CreateResource(&committedAllocDesc, &resDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &committedAlloc, IID_NULL, NULL)); + CHECK_BOOL(committedAlloc && committedAlloc->GetResource()); + CHECK_BOOL(committedAlloc->GetHeap() == NULL); // Committed, heap is implicit and inaccessible. + + // Create a custom pool and a buffer inside of it. + POOL_DESC poolDesc = {}; + poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_GPU_UPLOAD; + ComPtr pool; + CHECK_HR(ctx.allocator->CreatePool(&poolDesc, &pool)); + + ALLOCATION_DESC poolAllocDesc = {}; + poolAllocDesc.CustomPool = pool.Get(); + ComPtr poolAlloc; + CHECK_HR(ctx.allocator->CreateResource(&poolAllocDesc, &resDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &poolAlloc, IID_NULL, NULL)); + CHECK_BOOL(poolAlloc && poolAlloc->GetResource()); + + // Map the original buffer, write, then read + { + const auto res = alloc->GetResource(); + + UINT* mappedData = NULL; + CHECK_HR(res->Map(0, &EMPTY_RANGE, (void**)&mappedData)); // {0, 0} - not reading anything. + for(UINT i = 0; i < resDesc.Width / sizeof(UINT); ++i) + { + mappedData[i] = i * 3; + } + res->Unmap(0, NULL); // NULL - written everything. + + CHECK_HR(res->Map(0, NULL, (void**)&mappedData)); // NULL - reading everything. + CHECK_BOOL(mappedData[100] = 300); + res->Unmap(0, &EMPTY_RANGE); // {0, 0} - not written anything. + + } + + // Create two big buffers. + D3D12_RESOURCE_DESC bigResDesc = resDesc; + bigResDesc.Width = 128 * MEGABYTE; + + ComPtr bigAllocs[2]; + for(UINT i = 0; i < 2; ++i) + { + CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &bigResDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &bigAllocs[i], IID_NULL, NULL)); + CHECK_BOOL(bigAllocs[i] && bigAllocs[i]->GetResource()); + } + + // Create a texture. + constexpr UINT texSize = 256; + D3D12_RESOURCE_DESC texDesc = {}; + texDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + texDesc.Alignment = 0; + texDesc.Width = texSize; + texDesc.Height = texSize; + texDesc.DepthOrArraySize = 1; + texDesc.MipLevels = 1; + texDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + texDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + ComPtr texAlloc; + CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &texDesc, + D3D12_RESOURCE_STATE_COMMON, NULL, &texAlloc, IID_NULL, NULL)); + CHECK_BOOL(texAlloc && texAlloc->GetResource()); + + { + std::vector texPixels(texSize * texSize); + // Contents of texPixels[i] doesn't matter. + const auto texRes = texAlloc->GetResource(); + // Need to pass ppData == NULL for Map() to be used with a texture having D3D12_TEXTURE_LAYOUT_UNKNOWN. + CHECK_HR(texRes->Map(0, &EMPTY_RANGE, NULL)); // {0, 0} - not reading anything. + CHECK_HR(texRes->WriteToSubresource( + 0, // DstSubresource + NULL, // pDstBox + texPixels.data(), // pSrcData + texSize * sizeof(DWORD), // SrcRowPitch + texSize * texSize * sizeof(DWORD))); // SrcDepthPitch + texRes->Unmap(0, NULL); // NULL - written everything. + } + + // Check budget and stats + constexpr UINT totalAllocCount = 6; + Budget endLocalBudget = {}; + ctx.allocator->GetBudget(&endLocalBudget, NULL); + TotalStatistics endStats = {}; + ctx.allocator->CalculateStatistics(&endStats); + CHECK_BOOL(endLocalBudget.UsageBytes >= begLocalBudget.UsageBytes + + 2 * bigResDesc.Width + && "This can fail if GPU_UPLOAD falls back to system RAM e.g. when under PIX?"); + auto validateStats = [totalAllocCount, &bigResDesc](const Statistics& begStats, const Statistics& endStats) + { + CHECK_BOOL(endStats.BlockCount >= begStats.BlockCount); + CHECK_BOOL(endStats.BlockBytes >= begStats.BlockBytes); + CHECK_BOOL(endStats.AllocationCount == begStats.AllocationCount + totalAllocCount); + CHECK_BOOL(endStats.AllocationBytes > begStats.AllocationBytes + 2 * bigResDesc.Width); + }; + validateStats(begLocalBudget.Stats, endLocalBudget.Stats); + validateStats(begStats.Total.Stats, endStats.Total.Stats); + validateStats(begStats.MemorySegmentGroup[0].Stats, endStats.MemorySegmentGroup[0].Stats); // DXGI_MEMORY_SEGMENT_GROUP_LOCAL + validateStats(begStats.HeapType[4].Stats, endStats.HeapType[4].Stats); // D3D12_HEAP_TYPE_GPU_UPLOAD +#endif +} + static void TestVirtualBlocks(const TestContext& ctx) { wprintf(L"Test virtual blocks\n"); @@ -4250,6 +4396,8 @@ static void TestGroupBasics(const TestContext& ctx) TestDevice10(ctx); #endif + TestGPUUploadHeap(ctx); + FILE* file; fopen_s(&file, "Results.csv", "w"); assert(file != NULL);