Further development of budget management

This commit is contained in:
Adam Sawicki 2019-11-02 14:12:05 +01:00
parent 5f573f588a
commit 353e3675d3
3 changed files with 341 additions and 53 deletions

View File

@ -3863,13 +3863,15 @@ static void TestBudget()
uint32_t memTypeIndex = UINT32_MAX;
static const VkDeviceSize BUF_SIZE = 0x10000;
static const uint32_t BUF_COUNT = 32;
static const VkDeviceSize BUF_SIZE = 100ull * 1024 * 1024;
static const uint32_t BUF_COUNT = 4;
for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
{
VmaBudget budgetBeg = {};
vmaGetBudget(g_hAllocator, &budgetBeg);
vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {};
vmaGetBudget(g_hAllocator, budgetBeg);
VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufInfo.size = BUF_SIZE;
@ -3902,8 +3904,8 @@ static void TestBudget()
}
}
VmaBudget budgetWithBufs = {};
vmaGetBudget(g_hAllocator, &budgetWithBufs);
VmaBudget budgetWithBufs[VK_MAX_MEMORY_HEAPS] = {};
vmaGetBudget(g_hAllocator, budgetWithBufs);
// DESTROY BUFFERS
for(size_t bufIndex = BUF_COUNT; bufIndex--; )
@ -3911,28 +3913,77 @@ static void TestBudget()
vmaDestroyBuffer(g_hAllocator, bufInfos[bufIndex].Buffer, bufInfos[bufIndex].Allocation);
}
VmaBudget budgetEnd = {};
vmaGetBudget(g_hAllocator, &budgetEnd);
VmaBudget budgetEnd[VK_MAX_MEMORY_HEAPS] = {};
vmaGetBudget(g_hAllocator, budgetEnd);
// CHECK
for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
{
TEST(budgetEnd.allocationBytes[i] <= budgetEnd.blockBytes[i]);
TEST(budgetEnd[i].allocationBytes <= budgetEnd[i].blockBytes);
if(i == heapIndex)
{
TEST(budgetEnd.allocationBytes[i] == budgetBeg.allocationBytes[i]);
TEST(budgetWithBufs.allocationBytes[i] == budgetBeg.allocationBytes[i] + BUF_SIZE * BUF_COUNT);
TEST(budgetWithBufs.blockBytes[i] >= budgetEnd.blockBytes[i]);
TEST(budgetEnd[i].allocationBytes == budgetBeg[i].allocationBytes);
TEST(budgetWithBufs[i].allocationBytes == budgetBeg[i].allocationBytes + BUF_SIZE * BUF_COUNT);
TEST(budgetWithBufs[i].blockBytes >= budgetEnd[i].blockBytes);
}
else
{
TEST(budgetEnd.allocationBytes[i] == budgetEnd.allocationBytes[i] &&
budgetEnd.allocationBytes[i] == budgetWithBufs.allocationBytes[i]);
TEST(budgetEnd.blockBytes[i] == budgetEnd.blockBytes[i] &&
budgetEnd.blockBytes[i] == budgetWithBufs.blockBytes[i]);
TEST(budgetEnd[i].allocationBytes == budgetEnd[i].allocationBytes &&
budgetEnd[i].allocationBytes == budgetWithBufs[i].allocationBytes);
TEST(budgetEnd[i].blockBytes == budgetEnd[i].blockBytes &&
budgetEnd[i].blockBytes == budgetWithBufs[i].blockBytes);
}
}
}
// DELME
{
std::vector<BufferInfo> buffers;
for(uint32_t i = 0; i < 160; ++i)
{
//vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = 50ull * 1024 * 1024;
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
//allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
allocCreateInfo.memoryTypeBits = 1;
//allocCreateInfo.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
BufferInfo bufInfo = {};
VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
if(res == VK_SUCCESS)
{
buffers.push_back(std::move(bufInfo));
}
else
{
break;
}
}
char* statsString;
vmaBuildStatsString(g_hAllocator, &statsString, VK_TRUE);
printf("%s\n", statsString);
vmaFreeStatsString(g_hAllocator, statsString);
VmaBudget budget1[VK_MAX_MEMORY_HEAPS];
vmaGetBudget(g_hAllocator, budget1);
vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
VmaBudget budget2[VK_MAX_MEMORY_HEAPS];
vmaGetBudget(g_hAllocator, budget2);
for(size_t i = buffers.size(); i--; )
{
vmaDestroyBuffer(g_hAllocator, buffers[i].Buffer, buffers[i].Allocation);
}
}
}
static void TestMapping()

View File

@ -46,8 +46,10 @@ bool g_MemoryAliasingWarningEnabled = true;
static bool g_EnableValidationLayer = true;
static bool VK_KHR_get_memory_requirements2_enabled = false;
static bool VK_KHR_get_physical_device_properties2_enabled = false;
static bool VK_KHR_dedicated_allocation_enabled = false;
static bool VK_KHR_bind_memory2_enabled = false;
static bool VK_EXT_memory_budget_enabled = false;
bool g_SparseBindingEnabled = false;
static HINSTANCE g_hAppInstance;
@ -1115,15 +1117,32 @@ static void InitializeApplication()
}
}
std::vector<const char*> instanceExtensions;
instanceExtensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
instanceExtensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
uint32_t availableInstanceExtensionCount = 0;
ERR_GUARD_VULKAN( vkEnumerateInstanceExtensionProperties(nullptr, &availableInstanceExtensionCount, nullptr) );
std::vector<VkExtensionProperties> availableInstanceExtensions(availableInstanceExtensionCount);
if(availableInstanceExtensionCount > 0)
{
ERR_GUARD_VULKAN( vkEnumerateInstanceExtensionProperties(nullptr, &availableInstanceExtensionCount, availableInstanceExtensions.data()) );
}
std::vector<const char*> enabledInstanceExtensions;
enabledInstanceExtensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
enabledInstanceExtensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
std::vector<const char*> instanceLayers;
if(g_EnableValidationLayer == true)
{
instanceLayers.push_back(VALIDATION_LAYER_NAME);
instanceExtensions.push_back("VK_EXT_debug_report");
enabledInstanceExtensions.push_back("VK_EXT_debug_report");
}
for(const auto& extensionProperties : availableInstanceExtensions)
{
if(strcmp(extensionProperties.extensionName, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME) == 0)
{
enabledInstanceExtensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
VK_KHR_get_physical_device_properties2_enabled = true;
}
}
VkApplicationInfo appInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO };
@ -1135,8 +1154,8 @@ static void InitializeApplication()
VkInstanceCreateInfo instInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
instInfo.pApplicationInfo = &appInfo;
instInfo.enabledExtensionCount = static_cast<uint32_t>(instanceExtensions.size());
instInfo.ppEnabledExtensionNames = instanceExtensions.data();
instInfo.enabledExtensionCount = static_cast<uint32_t>(enabledInstanceExtensions.size());
instInfo.ppEnabledExtensionNames = enabledInstanceExtensions.data();
instInfo.enabledLayerCount = static_cast<uint32_t>(instanceLayers.size());
instInfo.ppEnabledLayerNames = instanceLayers.data();
@ -1282,6 +1301,11 @@ static void InitializeApplication()
enabledDeviceExtensions.push_back(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME);
VK_KHR_bind_memory2_enabled = true;
}
else if(strcmp(properties[i].extensionName, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME) == 0)
{
enabledDeviceExtensions.push_back(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
VK_EXT_memory_budget_enabled = true;
}
}
}
}
@ -1302,6 +1326,7 @@ static void InitializeApplication()
VmaAllocatorCreateInfo allocatorInfo = {};
allocatorInfo.physicalDevice = g_hPhysicalDevice;
allocatorInfo.device = g_hDevice;
allocatorInfo.instance = g_hVulkanInstance;
if(VK_KHR_dedicated_allocation_enabled)
{
@ -1322,6 +1347,10 @@ static void InitializeApplication()
{
allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT;
}
if(VK_EXT_memory_budget_enabled && VK_KHR_get_physical_device_properties2_enabled)
{
allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
}
if(USE_CUSTOM_CPU_ALLOCATION_CALLBACKS)
{
@ -1341,7 +1370,7 @@ static void InitializeApplication()
/*
std::array<VkDeviceSize, VK_MAX_MEMORY_HEAPS> heapSizeLimit;
std::fill(heapSizeLimit.begin(), heapSizeLimit.end(), VK_WHOLE_SIZE);
heapSizeLimit[0] = 100ull * 1024 * 1024;
heapSizeLimit[0] = 512ull * 1024 * 1024;
allocatorInfo.pHeapSizeLimit = heapSizeLimit.data();
*/

View File

@ -1,4 +1,5 @@
//
//
// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
@ -1718,6 +1719,14 @@ available through VmaAllocatorCreateInfo::pRecordSettings.
#endif
#endif
#if !defined(VMA_MEMORY_BUDGET)
#if VK_EXT_memory_budget && VK_KHR_get_physical_device_properties2
#define VMA_MEMORY_BUDGET 1
#else
#define VMA_MEMORY_BUDGET 0
#endif
#endif
// Define these macros to decorate all public functions with additional code,
// before and after returned type, appropriately. This may be useful for
// exporing the functions when compiling VMA as a separate library. Example:
@ -1788,8 +1797,8 @@ typedef enum VmaAllocatorCreateFlagBits {
VmaAllocatorCreateInfo::device, and you want them to be used internally by this
library:
- VK_KHR_get_memory_requirements2
- VK_KHR_dedicated_allocation
- VK_KHR_get_memory_requirements2 (device extension)
- VK_KHR_dedicated_allocation (device extension)
When this flag is set, you can experience following warnings reported by Vulkan
validation layer. You can ignore them.
@ -1809,6 +1818,18 @@ typedef enum VmaAllocatorCreateFlagBits {
This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2().
*/
VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004,
/**
Enables usage of VK_EXT_memory_budget extension.
You may set this flag only if you found out that this device extension is supported,
you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
and you want it to be used internally by this library, along with another instance extension
VK_KHR_get_physical_device_properties2, which is required by it.
The extension provides query for current memory usage and budget, which will probably
be more accurate than an estimation used by the library otherwise.
*/
VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008,
VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VmaAllocatorCreateFlagBits;
@ -1844,6 +1865,9 @@ typedef struct VmaVulkanFunctions {
PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR;
PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR;
#endif
#if VMA_MEMORY_BUDGET
PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR;
#endif
} VmaVulkanFunctions;
/// Flags to be used in VmaRecordSettings::flags.
@ -1952,6 +1976,11 @@ typedef struct VmaAllocatorCreateInfo
creation of the allocator object fails with `VK_ERROR_FEATURE_NOT_PRESENT`.
*/
const VmaRecordSettings* pRecordSettings;
/** \brief Optional handle to Vulkan instance object.
Optional, can be null. Must be set if #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT flas is used.
*/
VkInstance instance;
} VmaAllocatorCreateInfo;
/// Creates Allocator object.
@ -2041,47 +2070,54 @@ VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats(
VmaAllocator allocator,
VmaStats* pStats);
/** \brief Statistics of current memory usage and available budget, in bytes, per memory heap.
/** \brief Statistics of current memory usage and available budget, in bytes, for specific memory heap.
*/
typedef struct VmaBudget
{
/** \brief Sum size of all `VkDeviceMemory` blocks allocated from particular heap, in bytes.
*/
VkDeviceSize blockBytes[VK_MAX_MEMORY_HEAPS];
VkDeviceSize blockBytes;
/** \brief Sum size of all allocations created in particular heap, in bytes.
Always less or equal than `blockBytes[i]`.
Always less or equal than `blockBytes`.
Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused -
available for new allocations or wasted due to fragmentation.
*/
VkDeviceSize allocationBytes[VK_MAX_MEMORY_HEAPS];
VkDeviceSize allocationBytes;
/** \brief Estimated current memory usage of the program, in bytes.
Fetched from system using `VK_EXT_memory_budget` extension if enabled.
It might be different than `blockBytes[i]` (usually higher) due to additional implicit objects
also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers etc.
It might be different than `blockBytes` (usually higher) due to additional implicit objects
also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or
`VkDeviceMemory` blocks allocated outside of this library, if any.
*/
VkDeviceSize usage[VK_MAX_MEMORY_HEAPS];
VkDeviceSize usage;
/** \brief Estimated amount of memory available to the program, in bytes.
Fetched from system using `VK_EXT_memory_budget` extension if enabled.
It might be different (most probably smaller) than `VkMemoryHeap::size[i]` due to factors
It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors
external to the program, like other programs also consuming system resources.
Exceeding the budget may result, depending on operating system and graphics driver:
Difference `budget - usage` is the amount of additional memory that can probably
be allocated without problems. Exceeding the budget may result, depending on operating
system and graphics driver:
- Allocation failing with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
- Allocation taking very long time, even few seconds.
- Overall system slowdown.
- Even GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` returned somewhere later.
*/
VkDeviceSize budget[VK_MAX_MEMORY_HEAPS];
VkDeviceSize budget;
} VmaBudget;
/** \brief Retrieves information about current memory budget.
/** \brief Retrieves information about current memory budget for all memory heaps.
\param[out] pBudget Must point to array with number of elements at least equal to number of memory heaps in physical device used.
This function is called "get" not "calculate" because it is very fast, suitable to be called
every frame or every allocation. For more detailed statistics use vmaCalculateStats().
@ -2245,6 +2281,10 @@ typedef enum VmaAllocationCreateFlagBits {
Otherwise it is ignored.
*/
VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080,
/** Create allocation only if additional device memory required for it, if any, won't exceed
memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
*/
VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100,
/** Allocation strategy that chooses smallest possible free range for the
allocation.
@ -6781,13 +6821,30 @@ struct VmaCurrentBudgetData
VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS];
VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS];
#if VMA_MEMORY_BUDGET
VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch;
VMA_RW_MUTEX m_BudgetMutex;
uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS];
uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS];
uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS];
#endif // #if VMA_MEMORY_BUDGET
VmaCurrentBudgetData()
{
for(uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex)
{
m_BlockBytes[heapIndex] = 0;
m_AllocationBytes[heapIndex] = 0;
#if VMA_MEMORY_BUDGET
m_VulkanUsage[heapIndex] = 0;
m_VulkanBudget[heapIndex] = 0;
m_BlockBytesAtBudgetFetch[heapIndex] = 0;
#endif
}
#if VMA_MEMORY_BUDGET
m_OperationsSinceBudgetFetch = 0;
#endif
}
};
@ -6799,7 +6856,9 @@ public:
bool m_UseMutex;
bool m_UseKhrDedicatedAllocation;
bool m_UseKhrBindMemory2;
bool m_UseExtMemoryBudget;
VkDevice m_hDevice;
VkInstance m_hInstance;
bool m_AllocationCallbacksSpecified;
VkAllocationCallbacks m_AllocationCallbacks;
VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks;
@ -6906,7 +6965,8 @@ public:
void CalculateStats(VmaStats* pStats);
void GetBudget(VmaBudget* outBudget);
void GetBudget(
VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount);
#if VMA_STATS_STRING_ENABLED
void PrintDetailedMap(class VmaJsonWriter& json);
@ -7031,6 +7091,7 @@ private:
VkDeviceSize size,
VmaSuballocationType suballocType,
uint32_t memTypeIndex,
bool withinBudget,
bool map,
bool isUserDataString,
void* pUserData,
@ -7046,6 +7107,10 @@ private:
on GPU as they support creation of required buffer for copy operations.
*/
uint32_t CalculateGpuDefragmentationMemoryTypeBits() const;
#if VMA_MEMORY_BUDGET
void UpdateVulkanBudget();
#endif // #if VMA_MEMORY_BUDGET
};
////////////////////////////////////////////////////////////////////////////////
@ -11641,9 +11706,20 @@ VkResult VmaBlockVector::AllocatePage(
bool canMakeOtherLost = (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) != 0;
const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0;
const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0;
const bool withinBudget = (createInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0;
VkDeviceSize freeMemory;
{
const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex);
VmaBudget heapBudget = {};
m_hAllocator->GetBudget(&heapBudget, heapIndex, 1);
freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0;
}
const bool canCreateNewBlock =
((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) &&
(m_Blocks.size() < m_MaxBlockCount);
(m_Blocks.size() < m_MaxBlockCount) &&
freeMemory >= size;
uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK;
// If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer.
@ -11710,7 +11786,7 @@ VkResult VmaBlockVector::AllocatePage(
pAllocation);
if(res == VK_SUCCESS)
{
VMA_DEBUG_LOG(" Returned from last block #%u", (uint32_t)(m_Blocks.size() - 1));
VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId());
return VK_SUCCESS;
}
}
@ -11736,7 +11812,7 @@ VkResult VmaBlockVector::AllocatePage(
pAllocation);
if(res == VK_SUCCESS)
{
VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex);
VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId());
return VK_SUCCESS;
}
}
@ -11760,7 +11836,7 @@ VkResult VmaBlockVector::AllocatePage(
pAllocation);
if(res == VK_SUCCESS)
{
VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex);
VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId());
return VK_SUCCESS;
}
}
@ -11795,7 +11871,8 @@ VkResult VmaBlockVector::AllocatePage(
}
size_t newBlockIndex = 0;
VkResult res = CreateBlock(newBlockSize, &newBlockIndex);
VkResult res = newBlockSize <= freeMemory ?
CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
// Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize.
if(!m_ExplicitBlockSize)
{
@ -11806,7 +11883,8 @@ VkResult VmaBlockVector::AllocatePage(
{
newBlockSize = smallerNewBlockSize;
++newBlockSizeShift;
res = CreateBlock(newBlockSize, &newBlockIndex);
res = newBlockSize <= freeMemory ?
CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
else
{
@ -11832,7 +11910,7 @@ VkResult VmaBlockVector::AllocatePage(
pAllocation);
if(res == VK_SUCCESS)
{
VMA_DEBUG_LOG(" Created new block Size=%llu", newBlockSize);
VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize);
return VK_SUCCESS;
}
else
@ -11967,6 +12045,7 @@ VkResult VmaBlockVector::AllocatePage(
VMA_DEBUG_LOG(" Returned from existing block");
(*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData);
m_hAllocator->m_Budget.m_AllocationBytes[m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex)] += size;
++m_hAllocator->m_Budget.m_OperationsSinceBudgetFetch;
if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
{
m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
@ -12170,6 +12249,7 @@ VkResult VmaBlockVector::AllocateFromBlock(
VMA_HEAVY_ASSERT(pBlock->Validate());
(*pAllocation)->SetUserData(m_hAllocator, pUserData);
m_hAllocator->m_Budget.m_AllocationBytes[m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex)] += size;
++m_hAllocator->m_Budget.m_OperationsSinceBudgetFetch;
if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
{
m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
@ -14233,7 +14313,9 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0),
m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0),
m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0),
m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0),
m_hDevice(pCreateInfo->device),
m_hInstance(pCreateInfo->instance),
m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL),
m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ?
*pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks),
@ -14269,6 +14351,12 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros.");
}
#endif
#if !(VMA_MEMORY_BUDGET)
if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0)
{
VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros.");
}
#endif
memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks));
memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties));
@ -14362,6 +14450,13 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo)
#endif
}
#if VMA_MEMORY_BUDGET
if(m_UseExtMemoryBudget)
{
UpdateVulkanBudget();
}
#endif // #if VMA_MEMORY_BUDGET
return res;
}
@ -14427,6 +14522,14 @@ void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunc
(PFN_vkBindImageMemory2KHR)vkGetDeviceProcAddr(m_hDevice, "vkBindImageMemory2KHR");
}
#endif // #if VMA_BIND_MEMORY2
#if VMA_MEMORY_BUDGET
if(m_UseExtMemoryBudget)
{
VMA_ASSERT(m_hInstance != VK_NULL_HANDLE);
m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR =
(PFN_vkGetPhysicalDeviceMemoryProperties2KHR)vkGetInstanceProcAddr(m_hInstance, "vkGetPhysicalDeviceMemoryProperties2KHR");
}
#endif // #if VMA_MEMORY_BUDGET
#endif // #if VMA_STATIC_VULKAN_FUNCTIONS == 1
#define VMA_COPY_IF_NOT_NULL(funcName) \
@ -14458,6 +14561,9 @@ void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunc
#if VMA_BIND_MEMORY2
VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR);
VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR);
#endif
#if VMA_MEMORY_BUDGET
VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR);
#endif
}
@ -14496,6 +14602,12 @@ void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunc
VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL);
}
#endif
#if VMA_MEMORY_BUDGET
if(m_UseExtMemoryBudget)
{
VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL);
}
#endif
}
VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex)
@ -14503,7 +14615,7 @@ VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex)
const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size;
const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE;
return isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize;
return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32);
}
VkResult VmaAllocator_T::AllocateMemoryOfType(
@ -14559,6 +14671,7 @@ VkResult VmaAllocator_T::AllocateMemoryOfType(
size,
suballocType,
memTypeIndex,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
finalCreateInfo.pUserData,
@ -14594,6 +14707,7 @@ VkResult VmaAllocator_T::AllocateMemoryOfType(
size,
suballocType,
memTypeIndex,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
(finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
finalCreateInfo.pUserData,
@ -14621,6 +14735,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory(
VkDeviceSize size,
VmaSuballocationType suballocType,
uint32_t memTypeIndex,
bool withinBudget,
bool map,
bool isUserDataString,
void* pUserData,
@ -14631,6 +14746,17 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory(
{
VMA_ASSERT(allocationCount > 0 && pAllocations);
if(withinBudget)
{
const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
VmaBudget heapBudget = {};
GetBudget(&heapBudget, heapIndex, 1);
if(heapBudget.usage + size * allocationCount > heapBudget.budget)
{
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
}
VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
allocInfo.memoryTypeIndex = memTypeIndex;
allocInfo.allocationSize = size;
@ -14708,6 +14834,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory(
FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory);
const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
m_Budget.m_AllocationBytes[heapIndex] -= currAlloc->GetSize();
++m_Budget.m_OperationsSinceBudgetFetch;
currAlloc->SetUserData(this, VMA_NULL);
currAlloc->Dtor();
m_AllocationObjectAllocator.Free(currAlloc);
@ -14761,6 +14888,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage(
(*pAllocation)->SetUserData(this, pUserData);
const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
m_Budget.m_AllocationBytes[heapIndex] += size;
++m_Budget.m_OperationsSinceBudgetFetch;
if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
{
FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
@ -15029,6 +15157,7 @@ void VmaAllocator_T::FreeMemory(
}
m_Budget.m_AllocationBytes[MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex())] -= allocation->GetSize();
++m_Budget.m_OperationsSinceBudgetFetch;
allocation->SetUserData(this, VMA_NULL);
allocation->Dtor();
m_AllocationObjectAllocator.Free(allocation);
@ -15103,14 +15232,55 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats)
VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]);
}
void VmaAllocator_T::GetBudget(VmaBudget* outBudget)
void VmaAllocator_T::GetBudget(VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount)
{
for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
#if VMA_MEMORY_BUDGET
if(m_UseExtMemoryBudget)
{
outBudget->blockBytes[heapIndex] = m_Budget.m_BlockBytes[heapIndex];
outBudget->allocationBytes[heapIndex] = m_Budget.m_AllocationBytes[heapIndex];
outBudget->usage[heapIndex] = outBudget->blockBytes[heapIndex];
outBudget->budget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
if(m_Budget.m_OperationsSinceBudgetFetch < 30)
{
VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex);
for(uint32_t i = 0; i < heapCount; ++i, ++outBudget)
{
const uint32_t heapIndex = firstHeap + i;
outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex];
outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
if(m_Budget.m_VulkanUsage[heapIndex] + outBudget->blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex])
{
outBudget->usage = m_Budget.m_VulkanUsage[heapIndex] +
outBudget->blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex];
}
else
{
outBudget->usage = 0;
}
// Have to take MIN with heap size because explicit HeapSizeLimit is included in it.
outBudget->budget = VMA_MIN(
m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size);
}
}
else
{
UpdateVulkanBudget(); // Outside of mutex lock
GetBudget(outBudget, firstHeap, heapCount); // Recursion
}
}
else
#endif
{
for(uint32_t i = 0; i < heapCount; ++i, ++outBudget)
{
const uint32_t heapIndex = firstHeap + i;
outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex];
outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
outBudget->usage = outBudget->blockBytes;
outBudget->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
}
}
}
@ -15335,6 +15505,13 @@ void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats)
void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex)
{
m_CurrentFrameIndex.store(frameIndex);
#if VMA_MEMORY_BUDGET
if(m_UseExtMemoryBudget)
{
UpdateVulkanBudget();
}
#endif // #if VMA_MEMORY_BUDGET
}
void VmaAllocator_T::MakePoolAllocationsLost(
@ -15440,6 +15617,8 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc
if(res == VK_SUCCESS)
{
++m_Budget.m_OperationsSinceBudgetFetch;
// Informative callback.
if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL)
{
@ -15467,6 +15646,7 @@ void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, Vk
const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType);
m_Budget.m_BlockBytes[heapIndex] -= size;
++m_Budget.m_OperationsSinceBudgetFetch;
}
VkResult VmaAllocator_T::BindVulkanBuffer(
@ -15758,6 +15938,34 @@ uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const
return memoryTypeBits;
}
#if VMA_MEMORY_BUDGET
void VmaAllocator_T::UpdateVulkanBudget()
{
VMA_ASSERT(m_UseExtMemoryBudget);
VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR };
VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT };
memProps.pNext = &budgetProps;
GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps);
{
VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex);
for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
{
m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex];
m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex];
m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load();
}
m_Budget.m_OperationsSinceBudgetFetch = 0;
}
}
#endif // #if VMA_MEMORY_BUDGET
void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern)
{
if(VMA_DEBUG_INITIALIZE_ALLOCATIONS &&
@ -15957,7 +16165,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetBudget(
{
VMA_ASSERT(allocator && pBudget);
VMA_DEBUG_GLOBAL_MUTEX_LOCK
allocator->GetBudget(pBudget);
allocator->GetBudget(pBudget, 0, allocator->GetMemoryHeapCount());
}
#if VMA_STATS_STRING_ENABLED