diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index 42d4e18..35b50a7 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h @@ -3206,7 +3206,6 @@ remove them if not needed. #include // for assert #include // for min, max #include -#include // for std::atomic #ifndef VMA_NULL // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. @@ -3396,7 +3395,8 @@ If providing your own implementation, you need to implement a subset of std::ato - bool compare_exchange_weak(uint32_t& expected, uint32_t desired) */ #ifndef VMA_ATOMIC_UINT32 - #define VMA_ATOMIC_UINT32 std::atomic + #include + #define VMA_ATOMIC_UINT32 std::atomic #endif #ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY @@ -3744,6 +3744,18 @@ static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) return true; } +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ + memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); + outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). struct VmaMutexLock { @@ -6779,11 +6791,18 @@ public: void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + /* + Returns bit mask of memory types that can support defragmentation on GPU as + they support creation of required buffer for copy operations. + */ + uint32_t GetGpuDefragmentationMemoryTypeBits(); + private: VkDeviceSize m_PreferredLargeHeapBlockSize; VkPhysicalDevice m_PhysicalDevice; VMA_ATOMIC_UINT32 m_CurrentFrameIndex; + VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. VMA_RW_MUTEX m_PoolsMutex; // Protected by m_PoolsMutex. Sorted by pointer value. @@ -6838,6 +6857,12 @@ private: // Tries to free pMemory as Dedicated Memory. Returns true if found and freed. void FreeDedicatedMemory(VmaAllocation allocation); + + /* + Calculates and returns bit mask of memory types that can support defragmentation + on GPU as they support creation of required buffer for copy operations. + */ + uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; }; //////////////////////////////////////////////////////////////////////////////// @@ -12266,9 +12291,8 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu( // Go over all blocks. Create and bind buffer for whole block if necessary. { - VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT; + VkBufferCreateInfo bufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo); for(size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) { @@ -12428,7 +12452,8 @@ void VmaBlockVector::Defragment( const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 && isHostVisible; const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 && - !IsCorruptionDetectionEnabled(); + !IsCorruptionDetectionEnabled() && + ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0; // There are options to defragment this memory type. if(canDefragmentOnCpu || canDefragmentOnGpu) @@ -14164,6 +14189,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_PreferredLargeHeapBlockSize(0), m_PhysicalDevice(pCreateInfo->physicalDevice), m_CurrentFrameIndex(0), + m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), m_Pools(VmaStlAllocator(GetAllocationCallbacks())), m_NextPoolId(0) #if VMA_RECORDING_ENABLED @@ -15555,6 +15581,31 @@ void VmaAllocator_T::FreeDedicatedMemory(VmaAllocation allocation) VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); } +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ + VkBufferCreateInfo dummyBufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + + uint32_t memoryTypeBits = 0; + + // Create buffer. + VkBuffer buf = VMA_NULL; + VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( + m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); + if(res == VK_SUCCESS) + { + // Query for supported memory types. + VkMemoryRequirements memReq; + (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); + memoryTypeBits = memReq.memoryTypeBits; + + // Destroy buffer. + (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); + } + + return memoryTypeBits; +} + void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) { if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && @@ -15576,6 +15627,17 @@ void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pat } } +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ + uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); + if(memoryTypeBits == UINT32_MAX) + { + memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); + m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); + } + return memoryTypeBits; +} + #if VMA_STATS_STRING_ENABLED void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)