Fixed and finished merge of GPUUploadHeap support

Added Allocator::IsGPUUploadHeapSupported and macro D3D12MA_OPTIONS16_SUPPORTED.
This commit is contained in:
Adam Sawicki 2024-03-05 15:40:14 +01:00
parent dc222fe321
commit 47ebce51c9
5 changed files with 276 additions and 20 deletions

View File

@ -402,8 +402,9 @@ struct TotalStatistics
- 1 = `D3D12_HEAP_TYPE_UPLOAD` - 1 = `D3D12_HEAP_TYPE_UPLOAD`
- 2 = `D3D12_HEAP_TYPE_READBACK` - 2 = `D3D12_HEAP_TYPE_READBACK`
- 3 = `D3D12_HEAP_TYPE_CUSTOM` - 3 = `D3D12_HEAP_TYPE_CUSTOM`
- 4 = `D3D12_HEAP_TYPE_GPU_UPLOAD`
*/ */
DetailedStatistics HeapType[4]; DetailedStatistics HeapType[5];
/** \brief One element for each memory segment group located at the following indices: /** \brief One element for each memory segment group located at the following indices:
- 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` - 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL`
@ -413,9 +414,9 @@ struct TotalStatistics
- When `IsUMA() == FALSE` (discrete graphics card): - When `IsUMA() == FALSE` (discrete graphics card):
- `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory
(resources allocated in `D3D12_HEAP_TYPE_DEFAULT` or `D3D12_MEMORY_POOL_L1`). (resources allocated in `D3D12_HEAP_TYPE_DEFAULT`, `D3D12_HEAP_TYPE_GPU_UPLOAD` or `D3D12_MEMORY_POOL_L1`).
- `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory
(resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`).
- When `IsUMA() == TRUE` (integrated graphics chip): - When `IsUMA() == TRUE` (integrated graphics chip):
- `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources. - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources.
- `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0. - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0.
@ -1140,6 +1141,15 @@ public:
- "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)"
*/ */
BOOL IsCacheCoherentUMA() const; BOOL IsCacheCoherentUMA() const;
/** \brief Returns true if GPU Upload Heaps are supported on the current system.
When true, you can use `D3D12_HEAP_TYPE_GPU_UPLOAD`.
This flag is fetched from `D3D12_FEATURE_D3D12_OPTIONS16::GPUUploadHeapSupported`.
`#define D3D12MA_OPTIONS16_SUPPORTED 1` is needed for the compilation of this library. Otherwise the flag is always false.
*/
BOOL IsGPUUploadHeapSupported() const;
/** \brief Returns total amount of memory of specific segment group, in bytes. /** \brief Returns total amount of memory of specific segment group, in bytes.
\param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`.

View File

@ -1,3 +1,5 @@
cmake_minimum_required(VERSION 3.25)
set(D3D12MA_LIBRARY_SOURCE_FILES set(D3D12MA_LIBRARY_SOURCE_FILES
D3D12MemAlloc.cpp D3D12MemAlloc.cpp
"${PROJECT_SOURCE_DIR}/include/D3D12MemAlloc.h" "${PROJECT_SOURCE_DIR}/include/D3D12MemAlloc.h"
@ -150,6 +152,12 @@ if(D3D12MA_AGILITY_SDK_DIRECTORY)
if(D3D12MA_AGILITY_SDK_PREVIEW) if(D3D12MA_AGILITY_SDK_PREVIEW)
target_compile_definitions(D3D12Sample PRIVATE D3D12MA_USE_AGILITY_SDK_PREVIEW=1) target_compile_definitions(D3D12Sample PRIVATE D3D12MA_USE_AGILITY_SDK_PREVIEW=1)
endif() endif()
add_custom_command(TARGET D3D12Sample POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory "$<TARGET_FILE_DIR:D3D12Sample>/D3D12"
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/bin/x64/D3D12Core.dll"
"$<TARGET_FILE_DIR:D3D12Sample>/D3D12/"
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/bin/x64/d3d12SDKLayers.dll"
"$<TARGET_FILE_DIR:D3D12Sample>/D3D12/")
endif() endif()
else() else()
message(FATAL_ERROR "DX12 Agility SDK not found - cannot find file \"${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/include/d3d12.h\".") message(FATAL_ERROR "DX12 Agility SDK not found - cannot find file \"${D3D12MA_AGILITY_SDK_DIRECTORY}/build/native/include/d3d12.h\".")
@ -157,3 +165,11 @@ if(D3D12MA_AGILITY_SDK_DIRECTORY)
else() else()
message(STATUS "DX12 Agility SDK not used.") message(STATUS "DX12 Agility SDK not used.")
endif() endif()
option(D3D12MA_OPTIONS16_SUPPORTED "Set if using Agility SDK 1.710.0-preview or newer that defines D3D12_FEATURE_DATA_D3D12_OPTIONS16." OFF)
if(D3D12MA_OPTIONS16_SUPPORTED)
target_compile_definitions(D3D12MemoryAllocator PRIVATE D3D12MA_OPTIONS16_SUPPORTED=1)
if(${D3D12MA_BUILD_SAMPLE} AND ${WIN32})
target_compile_definitions(D3D12Sample PRIVATE D3D12MA_OPTIONS16_SUPPORTED=1)
endif()
endif()

View File

@ -134,9 +134,9 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs.
namespace D3D12MA namespace D3D12MA
{ {
static constexpr UINT HEAP_TYPE_COUNT = 4; static constexpr UINT HEAP_TYPE_COUNT = 5;
static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK. static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 4; // Only DEFAULT, UPLOAD, READBACK, GPU_UPLOAD.
static constexpr UINT DEFAULT_POOL_MAX_COUNT = 9; static constexpr UINT DEFAULT_POOL_MAX_COUNT = STANDARD_HEAP_TYPE_COUNT * 3;
static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3;
// Minimum size of a free suballocation to register it in the free suballocation collection. // Minimum size of a free suballocation to register it in the free suballocation collection.
static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16;
@ -147,12 +147,14 @@ static const WCHAR* const HeapTypeNames[] =
L"UPLOAD", L"UPLOAD",
L"READBACK", L"READBACK",
L"CUSTOM", L"CUSTOM",
L"GPU_UPLOAD",
}; };
static const WCHAR* const StandardHeapTypeNames[] = static const WCHAR* const StandardHeapTypeNames[] =
{ {
L"DEFAULT", L"DEFAULT",
L"UPLOAD", L"UPLOAD",
L"READBACK", L"READBACK",
L"GPU_UPLOAD",
}; };
static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS =
@ -160,6 +162,8 @@ static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS =
static const D3D12_RESIDENCY_PRIORITY D3D12_RESIDENCY_PRIORITY_NONE = D3D12_RESIDENCY_PRIORITY(0); static const D3D12_RESIDENCY_PRIORITY D3D12_RESIDENCY_PRIORITY_NONE = D3D12_RESIDENCY_PRIORITY(0);
static const D3D12_HEAP_TYPE D3D12_HEAP_TYPE_GPU_UPLOAD_COPY = (D3D12_HEAP_TYPE)5;
#ifndef _D3D12MA_ENUM_DECLARATIONS #ifndef _D3D12MA_ENUM_DECLARATIONS
// Local copy of this enum, as it is provided only by <dxgi1_4.h>, so it may not be available. // Local copy of this enum, as it is provided only by <dxgi1_4.h>, so it may not be available.
@ -465,6 +469,7 @@ static UINT StandardHeapTypeToIndex(D3D12_HEAP_TYPE type)
case D3D12_HEAP_TYPE_DEFAULT: return 0; case D3D12_HEAP_TYPE_DEFAULT: return 0;
case D3D12_HEAP_TYPE_UPLOAD: return 1; case D3D12_HEAP_TYPE_UPLOAD: return 1;
case D3D12_HEAP_TYPE_READBACK: return 2; case D3D12_HEAP_TYPE_READBACK: return 2;
case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: return 3;
default: D3D12MA_ASSERT(0); return UINT_MAX; default: D3D12MA_ASSERT(0); return UINT_MAX;
} }
} }
@ -476,6 +481,7 @@ static D3D12_HEAP_TYPE IndexToStandardHeapType(UINT heapTypeIndex)
case 0: return D3D12_HEAP_TYPE_DEFAULT; case 0: return D3D12_HEAP_TYPE_DEFAULT;
case 1: return D3D12_HEAP_TYPE_UPLOAD; case 1: return D3D12_HEAP_TYPE_UPLOAD;
case 2: return D3D12_HEAP_TYPE_READBACK; case 2: return D3D12_HEAP_TYPE_READBACK;
case 3: return D3D12_HEAP_TYPE_GPU_UPLOAD_COPY;
default: D3D12MA_ASSERT(0); return D3D12_HEAP_TYPE_CUSTOM; default: D3D12MA_ASSERT(0); return D3D12_HEAP_TYPE_CUSTOM;
} }
} }
@ -525,7 +531,8 @@ static bool IsHeapTypeStandard(D3D12_HEAP_TYPE type)
{ {
return type == D3D12_HEAP_TYPE_DEFAULT || return type == D3D12_HEAP_TYPE_DEFAULT ||
type == D3D12_HEAP_TYPE_UPLOAD || type == D3D12_HEAP_TYPE_UPLOAD ||
type == D3D12_HEAP_TYPE_READBACK; type == D3D12_HEAP_TYPE_READBACK ||
type == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY;
} }
static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type)
@ -6567,6 +6574,7 @@ public:
BOOL IsUMA() const { return m_D3D12Architecture.UMA; } BOOL IsUMA() const { return m_D3D12Architecture.UMA; }
BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; }
bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; }
bool IsGPUUploadHeapSupported() const { return m_GPUUploadHeapSupported != FALSE; }
bool UseMutex() const { return m_UseMutex; } bool UseMutex() const { return m_UseMutex; }
AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; }
UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); }
@ -6575,6 +6583,7 @@ public:
0: D3D12_HEAP_TYPE_DEFAULT 0: D3D12_HEAP_TYPE_DEFAULT
1: D3D12_HEAP_TYPE_UPLOAD 1: D3D12_HEAP_TYPE_UPLOAD
2: D3D12_HEAP_TYPE_READBACK 2: D3D12_HEAP_TYPE_READBACK
3: D3D12_HEAP_TYPE_GPU_UPLOAD
else: else:
0: D3D12_HEAP_TYPE_DEFAULT + buffer 0: D3D12_HEAP_TYPE_DEFAULT + buffer
1: D3D12_HEAP_TYPE_DEFAULT + texture 1: D3D12_HEAP_TYPE_DEFAULT + texture
@ -6585,8 +6594,11 @@ public:
6: D3D12_HEAP_TYPE_READBACK + buffer 6: D3D12_HEAP_TYPE_READBACK + buffer
7: D3D12_HEAP_TYPE_READBACK + texture 7: D3D12_HEAP_TYPE_READBACK + texture
8: D3D12_HEAP_TYPE_READBACK + texture RT or DS 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS
9: D3D12_HEAP_TYPE_GPU_UPLOAD + buffer
10: D3D12_HEAP_TYPE_GPU_UPLOAD + texture
11: D3D12_HEAP_TYPE_GPU_UPLOAD + texture RT or DS
*/ */
UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 3 : 9; } UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 4 : 12; }
BlockVector** GetDefaultPools() { return m_BlockVectors; } BlockVector** GetDefaultPools() { return m_BlockVectors; }
HRESULT Init(const ALLOCATOR_DESC& desc); HRESULT Init(const ALLOCATOR_DESC& desc);
@ -6673,6 +6685,7 @@ private:
D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex; D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex;
DXGI_ADAPTER_DESC m_AdapterDesc; DXGI_ADAPTER_DESC m_AdapterDesc;
D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options;
BOOL m_GPUUploadHeapSupported = FALSE;
D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture;
AllocationObjectAllocator m_AllocationObjectAllocator; AllocationObjectAllocator m_AllocationObjectAllocator;
@ -6814,6 +6827,20 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc)
m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER); m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER);
#endif #endif
// You must define this macro to like `#define D3D12MA_OPTIONS16_SUPPORTED 1` to enable GPU Upload Heaps!
// Unfortunately there is no way to programmatically check if the included <d3d12.h> defines D3D12_FEATURE_DATA_D3D12_OPTIONS16 or not.
// Main interfaces have respective macros like __ID3D12Device4_INTERFACE_DEFINED__, but structures like this do not.
#if D3D12MA_OPTIONS16_SUPPORTED
{
D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16 = {};
hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16));
if (SUCCEEDED(hr))
{
m_GPUUploadHeapSupported = options16.GPUUploadHeapSupported;
}
}
#endif
hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture));
if (FAILED(hr)) if (FAILED(hr))
{ {
@ -6913,7 +6940,7 @@ UINT AllocatorPimpl::StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapTy
D3D12MA_ASSERT(IsHeapTypeStandard(heapType)); D3D12MA_ASSERT(IsHeapTypeStandard(heapType));
if (IsUMA()) if (IsUMA())
return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY;
return heapType == D3D12_HEAP_TYPE_DEFAULT ? return (heapType == D3D12_HEAP_TYPE_DEFAULT || heapType == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY) ?
DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY;
} }
@ -7279,15 +7306,16 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat
ClearDetailedStatistics(outCustomHeaps[1]); ClearDetailedStatistics(outCustomHeaps[1]);
} }
// Process default pools. 3 standard heap types only. Add them to outStats.HeapType[i]. // Process default pools. 4 standard heap types only. Add them to outStats.HeapType[i].
if (SupportsResourceHeapTier2()) if (SupportsResourceHeapTier2())
{ {
// DEFAULT, UPLOAD, READBACK. // DEFAULT, UPLOAD, READBACK, GPU_UPLOAD.
for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex)
{ {
BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex];
D3D12MA_ASSERT(pBlockVector); D3D12MA_ASSERT(pBlockVector);
pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4
pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]);
} }
} }
else else
@ -7299,7 +7327,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat
{ {
BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType];
D3D12MA_ASSERT(pBlockVector); D3D12MA_ASSERT(pBlockVector);
pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]);
const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4
pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]);
} }
} }
} }
@ -7314,6 +7344,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat
AddDetailedStatistics( AddDetailedStatistics(
outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)], outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)],
outStats.HeapType[2]); outStats.HeapType[2]);
AddDetailedStatistics(
outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_GPU_UPLOAD_COPY)],
outStats.HeapType[4]);
// Process custom pools. // Process custom pools.
DetailedStatistics tmpStats; DetailedStatistics tmpStats;
@ -7338,13 +7371,14 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat
} }
} }
// Process committed allocations. 3 standard heap types only. // Process committed allocations. standard heap types only.
for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex)
{ {
ClearDetailedStatistics(tmpStats); ClearDetailedStatistics(tmpStats);
m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats); m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats);
const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4
AddDetailedStatistics( AddDetailedStatistics(
outStats.HeapType[heapTypeIndex], tmpStats); outStats.HeapType[outputIndex], tmpStats);
AddDetailedStatistics( AddDetailedStatistics(
outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToStandardHeapType(heapTypeIndex))], tmpStats); outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToStandardHeapType(heapTypeIndex))], tmpStats);
} }
@ -7366,19 +7400,24 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat
D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == D3D12MA_ASSERT(outStats.Total.Stats.BlockCount ==
outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount + outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount +
outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount); outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount +
outStats.HeapType[4].Stats.BlockCount);
D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount ==
outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount + outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount +
outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount); outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount +
outStats.HeapType[4].Stats.AllocationCount);
D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes ==
outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes + outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes +
outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes); outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes +
outStats.HeapType[4].Stats.BlockBytes);
D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes ==
outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes + outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes +
outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes); outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes +
outStats.HeapType[4].Stats.AllocationBytes);
D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == D3D12MA_ASSERT(outStats.Total.UnusedRangeCount ==
outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount + outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount +
outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount); outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount +
outStats.HeapType[4].UnusedRangeCount);
} }
void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget) void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget)
@ -7426,6 +7465,7 @@ void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE hea
switch (heapType) switch (heapType)
{ {
case D3D12_HEAP_TYPE_DEFAULT: case D3D12_HEAP_TYPE_DEFAULT:
case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY:
GetBudget(&outBudget, NULL); GetBudget(&outBudget, NULL);
break; break;
case D3D12_HEAP_TYPE_UPLOAD: case D3D12_HEAP_TYPE_UPLOAD:
@ -7482,6 +7522,9 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap)
json.WriteBool(m_D3D12Architecture.UMA); json.WriteBool(m_D3D12Architecture.UMA);
json.WriteString(L"CacheCoherentUMA"); json.WriteString(L"CacheCoherentUMA");
json.WriteBool(m_D3D12Architecture.CacheCoherentUMA); json.WriteBool(m_D3D12Architecture.CacheCoherentUMA);
json.WriteString(L"GPUUploadHeapSupported");
json.WriteBool(m_GPUUploadHeapSupported != FALSE);
} }
json.EndObject(); json.EndObject();
} }
@ -7514,6 +7557,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap)
json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); json.AddDetailedStatisticsInfoObject(stats.HeapType[0]);
} }
json.EndObject(); json.EndObject();
if(IsGPUUploadHeapSupported())
{
json.WriteString(L"GPU_UPLOAD");
json.BeginObject();
{
json.WriteString(L"Stats");
json.AddDetailedStatisticsInfoObject(stats.HeapType[4]);
}
json.EndObject();
}
} }
json.WriteString(L"UPLOAD"); json.WriteString(L"UPLOAD");
json.BeginObject(); json.BeginObject();
@ -7564,6 +7618,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap)
} }
json.EndObject(); json.EndObject();
if(IsGPUUploadHeapSupported())
{
json.WriteString(L"GPU_UPLOAD");
json.BeginObject();
{
json.WriteString(L"Stats");
json.AddDetailedStatisticsInfoObject(stats.HeapType[4]);
}
json.EndObject();
}
json.WriteString(L"CUSTOM"); json.WriteString(L"CUSTOM");
json.BeginObject(); json.BeginObject();
{ {
@ -8012,6 +8077,9 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U
outCommittedAllocationParams = CommittedAllocationParameters(); outCommittedAllocationParams = CommittedAllocationParameters();
outPreferCommitted = false; outPreferCommitted = false;
D3D12MA_ASSERT((allocDesc.HeapType != D3D12_HEAP_TYPE_GPU_UPLOAD_COPY || IsGPUUploadHeapSupported()) &&
"Trying to allocate from D3D12_HEAP_TYPE_GPU_UPLOAD while GPUUploadHeapSupported == FALSE or D3D12MA_OPTIONS16_SUPPORTED macro was not defined when compiling D3D12MA library.");
bool msaaAlwaysCommitted; bool msaaAlwaysCommitted;
if (allocDesc.CustomPool != NULL) if (allocDesc.CustomPool != NULL)
{ {
@ -8111,6 +8179,7 @@ UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, Reso
case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break; case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break;
case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break; case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break;
case D3D12_HEAP_TYPE_READBACK: poolIndex = 2; break; case D3D12_HEAP_TYPE_READBACK: poolIndex = 2; break;
case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: poolIndex = 3; break;
default: D3D12MA_ASSERT(0); default: D3D12MA_ASSERT(0);
} }
@ -8166,6 +8235,9 @@ void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_H
case 2: case 2:
outHeapType = D3D12_HEAP_TYPE_READBACK; outHeapType = D3D12_HEAP_TYPE_READBACK;
break; break;
case 3:
outHeapType = D3D12_HEAP_TYPE_GPU_UPLOAD_COPY;
break;
default: default:
D3D12MA_ASSERT(0); D3D12MA_ASSERT(0);
} }
@ -10173,6 +10245,11 @@ BOOL Allocator::IsCacheCoherentUMA() const
return m_Pimpl->IsCacheCoherentUMA(); return m_Pimpl->IsCacheCoherentUMA();
} }
BOOL Allocator::IsGPUUploadHeapSupported() const
{
return m_Pimpl->IsGPUUploadHeapSupported();
}
UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const
{ {
return m_Pimpl->GetMemoryCapacity(memorySegmentGroup); return m_Pimpl->GetMemoryCapacity(memorySegmentGroup);

View File

@ -572,6 +572,8 @@ static std::wstring SizeToStr(size_t size)
static void PrintAdapterInformation(IDXGIAdapter1* adapter) static void PrintAdapterInformation(IDXGIAdapter1* adapter)
{ {
assert(g_Allocator);
wprintf(L"DXGI_ADAPTER_DESC1:\n"); wprintf(L"DXGI_ADAPTER_DESC1:\n");
wprintf(L" Description = %s\n", g_AdapterDesc.Description); wprintf(L" Description = %s\n", g_AdapterDesc.Description);
wprintf(L" VendorId = 0x%X (%s)\n", g_AdapterDesc.VendorId, VendorIDToStr(g_AdapterDesc.VendorId)); wprintf(L" VendorId = 0x%X (%s)\n", g_AdapterDesc.VendorId, VendorIDToStr(g_AdapterDesc.VendorId));
@ -598,6 +600,9 @@ static void PrintAdapterInformation(IDXGIAdapter1* adapter)
assert(0); assert(0);
} }
wprintf(L"D3D12_FEATURE_DATA_D3D12_OPTIONS16:\n");
wprintf(L" GPUUploadHeapSupported = %u\n", g_Allocator->IsGPUUploadHeapSupported() ? 1 : 0);
ComPtr<IDXGIAdapter3> adapter3; ComPtr<IDXGIAdapter3> adapter3;
if(SUCCEEDED(adapter->QueryInterface(IID_PPV_ARGS(&adapter3)))) if(SUCCEEDED(adapter->QueryInterface(IID_PPV_ARGS(&adapter3))))
{ {

View File

@ -2983,6 +2983,152 @@ static void TestDevice10(const TestContext& ctx)
} }
#endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ #endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__
static void TestGPUUploadHeap(const TestContext& ctx)
{
#if D3D12MA_OPTIONS16_SUPPORTED
using namespace D3D12MA;
wprintf(L"Test GPU Upload Heap\n");
if(!ctx.allocator->IsGPUUploadHeapSupported())
{
wprintf(L" Skipped due to GPUUploadHeap not supported.\n");
return;
}
Budget begLocalBudget = {};
ctx.allocator->GetBudget(&begLocalBudget, NULL);
TotalStatistics begStats = {};
ctx.allocator->CalculateStatistics(&begStats);
// Create a buffer, likely placed.
ALLOCATION_DESC allocDesc = {};
allocDesc.HeapType = D3D12_HEAP_TYPE_GPU_UPLOAD;
D3D12_RESOURCE_DESC resDesc;
FillResourceDescForBuffer(resDesc, 64 * KILOBYTE);
ComPtr<Allocation> alloc;
CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &resDesc,
D3D12_RESOURCE_STATE_COMMON, NULL, &alloc, IID_NULL, NULL));
CHECK_BOOL(alloc && alloc->GetResource());
CHECK_BOOL(alloc->GetResource()->GetGPUVirtualAddress() != 0);
{
D3D12_HEAP_PROPERTIES heapProps = {};
D3D12_HEAP_FLAGS heapFlags = {};
CHECK_HR(alloc->GetResource()->GetHeapProperties(&heapProps, &heapFlags));
CHECK_BOOL(heapProps.Type == D3D12_HEAP_TYPE_GPU_UPLOAD);
}
// Create a committed one.
ALLOCATION_DESC committedAllocDesc = allocDesc;
committedAllocDesc.Flags |= ALLOCATION_FLAG_COMMITTED;
ComPtr<Allocation> committedAlloc;
CHECK_HR(ctx.allocator->CreateResource(&committedAllocDesc, &resDesc,
D3D12_RESOURCE_STATE_COMMON, NULL, &committedAlloc, IID_NULL, NULL));
CHECK_BOOL(committedAlloc && committedAlloc->GetResource());
CHECK_BOOL(committedAlloc->GetHeap() == NULL); // Committed, heap is implicit and inaccessible.
// Create a custom pool and a buffer inside of it.
POOL_DESC poolDesc = {};
poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_GPU_UPLOAD;
ComPtr<Pool> pool;
CHECK_HR(ctx.allocator->CreatePool(&poolDesc, &pool));
ALLOCATION_DESC poolAllocDesc = {};
poolAllocDesc.CustomPool = pool.Get();
ComPtr<Allocation> poolAlloc;
CHECK_HR(ctx.allocator->CreateResource(&poolAllocDesc, &resDesc,
D3D12_RESOURCE_STATE_COMMON, NULL, &poolAlloc, IID_NULL, NULL));
CHECK_BOOL(poolAlloc && poolAlloc->GetResource());
// Map the original buffer, write, then read
{
const auto res = alloc->GetResource();
UINT* mappedData = NULL;
CHECK_HR(res->Map(0, &EMPTY_RANGE, (void**)&mappedData)); // {0, 0} - not reading anything.
for(UINT i = 0; i < resDesc.Width / sizeof(UINT); ++i)
{
mappedData[i] = i * 3;
}
res->Unmap(0, NULL); // NULL - written everything.
CHECK_HR(res->Map(0, NULL, (void**)&mappedData)); // NULL - reading everything.
CHECK_BOOL(mappedData[100] = 300);
res->Unmap(0, &EMPTY_RANGE); // {0, 0} - not written anything.
}
// Create two big buffers.
D3D12_RESOURCE_DESC bigResDesc = resDesc;
bigResDesc.Width = 128 * MEGABYTE;
ComPtr<Allocation> bigAllocs[2];
for(UINT i = 0; i < 2; ++i)
{
CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &bigResDesc,
D3D12_RESOURCE_STATE_COMMON, NULL, &bigAllocs[i], IID_NULL, NULL));
CHECK_BOOL(bigAllocs[i] && bigAllocs[i]->GetResource());
}
// Create a texture.
constexpr UINT texSize = 256;
D3D12_RESOURCE_DESC texDesc = {};
texDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
texDesc.Alignment = 0;
texDesc.Width = texSize;
texDesc.Height = texSize;
texDesc.DepthOrArraySize = 1;
texDesc.MipLevels = 1;
texDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
texDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
ComPtr<Allocation> texAlloc;
CHECK_HR(ctx.allocator->CreateResource(&allocDesc, &texDesc,
D3D12_RESOURCE_STATE_COMMON, NULL, &texAlloc, IID_NULL, NULL));
CHECK_BOOL(texAlloc && texAlloc->GetResource());
{
std::vector<UINT> texPixels(texSize * texSize);
// Contents of texPixels[i] doesn't matter.
const auto texRes = texAlloc->GetResource();
// Need to pass ppData == NULL for Map() to be used with a texture having D3D12_TEXTURE_LAYOUT_UNKNOWN.
CHECK_HR(texRes->Map(0, &EMPTY_RANGE, NULL)); // {0, 0} - not reading anything.
CHECK_HR(texRes->WriteToSubresource(
0, // DstSubresource
NULL, // pDstBox
texPixels.data(), // pSrcData
texSize * sizeof(DWORD), // SrcRowPitch
texSize * texSize * sizeof(DWORD))); // SrcDepthPitch
texRes->Unmap(0, NULL); // NULL - written everything.
}
// Check budget and stats
constexpr UINT totalAllocCount = 6;
Budget endLocalBudget = {};
ctx.allocator->GetBudget(&endLocalBudget, NULL);
TotalStatistics endStats = {};
ctx.allocator->CalculateStatistics(&endStats);
CHECK_BOOL(endLocalBudget.UsageBytes >= begLocalBudget.UsageBytes
+ 2 * bigResDesc.Width
&& "This can fail if GPU_UPLOAD falls back to system RAM e.g. when under PIX?");
auto validateStats = [totalAllocCount, &bigResDesc](const Statistics& begStats, const Statistics& endStats)
{
CHECK_BOOL(endStats.BlockCount >= begStats.BlockCount);
CHECK_BOOL(endStats.BlockBytes >= begStats.BlockBytes);
CHECK_BOOL(endStats.AllocationCount == begStats.AllocationCount + totalAllocCount);
CHECK_BOOL(endStats.AllocationBytes > begStats.AllocationBytes + 2 * bigResDesc.Width);
};
validateStats(begLocalBudget.Stats, endLocalBudget.Stats);
validateStats(begStats.Total.Stats, endStats.Total.Stats);
validateStats(begStats.MemorySegmentGroup[0].Stats, endStats.MemorySegmentGroup[0].Stats); // DXGI_MEMORY_SEGMENT_GROUP_LOCAL
validateStats(begStats.HeapType[4].Stats, endStats.HeapType[4].Stats); // D3D12_HEAP_TYPE_GPU_UPLOAD
#endif
}
static void TestVirtualBlocks(const TestContext& ctx) static void TestVirtualBlocks(const TestContext& ctx)
{ {
wprintf(L"Test virtual blocks\n"); wprintf(L"Test virtual blocks\n");
@ -4250,6 +4396,8 @@ static void TestGroupBasics(const TestContext& ctx)
TestDevice10(ctx); TestDevice10(ctx);
#endif #endif
TestGPUUploadHeap(ctx);
FILE* file; FILE* file;
fopen_s(&file, "Results.csv", "w"); fopen_s(&file, "Results.csv", "w");
assert(file != NULL); assert(file != NULL);