Added VmaDefragmentationInfo2::poolCount, pPools. Added test for it - TestDefragmentationWholePool. Removed VmaDefragmentationStats::allocationsLost. Optimized defragmentation algorithm.

This commit is contained in:
Adam Sawicki 2018-11-22 16:14:50 +01:00
parent da5d248506
commit 52076ebf26
2 changed files with 287 additions and 61 deletions

View File

@ -1361,6 +1361,93 @@ void TestDefragmentationSimple()
vmaDestroyPool(g_hAllocator, pool);
}
void TestDefragmentationWholePool()
{
wprintf(L"Test defragmentation whole pool\n");
RandomNumberGenerator rand(668);
const VkDeviceSize BUF_SIZE = 0x10000;
const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = BUF_SIZE;
bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo exampleAllocCreateInfo = {};
exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
uint32_t memTypeIndex = UINT32_MAX;
vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
VmaPoolCreateInfo poolCreateInfo = {};
poolCreateInfo.blockSize = BLOCK_SIZE;
poolCreateInfo.memoryTypeIndex = memTypeIndex;
VmaDefragmentationStats defragStats[2];
for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
{
VmaPool pool;
ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
std::vector<AllocInfo> allocations;
// Buffers of fixed size.
// Fill 2 blocks. Remove odd buffers. Defragment all of them.
for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
{
AllocInfo allocInfo;
CreateBuffer(pool, bufCreateInfo, false, allocInfo);
allocations.push_back(allocInfo);
}
for(size_t i = 1; i < allocations.size(); ++i)
{
DestroyAllocation(allocations[i]);
allocations.erase(allocations.begin() + i);
}
VmaDefragmentationInfo2 defragInfo = {};
defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
std::vector<VmaAllocation> allocationsToDefrag;
if(caseIndex == 0)
{
defragInfo.poolCount = 1;
defragInfo.pPools = &pool;
}
else
{
const size_t allocCount = allocations.size();
allocationsToDefrag.resize(allocCount);
std::transform(
allocations.begin(), allocations.end(),
allocationsToDefrag.begin(),
[](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
defragInfo.allocationCount = (uint32_t)allocCount;
defragInfo.pAllocations = allocationsToDefrag.data();
}
VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
TEST(res >= VK_SUCCESS);
vmaDefragmentationEnd(g_hAllocator, defragCtx);
TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
ValidateAllocationsData(allocations.data(), allocations.size());
DestroyAllAllocations(allocations);
vmaDestroyPool(g_hAllocator, pool);
}
TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
}
void TestDefragmentationFull()
{
std::vector<AllocInfo> allocations;
@ -1577,7 +1664,6 @@ static void TestDefragmentationGpu(uint32_t flags)
TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
TEST(stats.allocationsLost == 0);
}
ValidateGpuData(allocations.data(), allocations.size());
@ -4933,17 +5019,18 @@ void Test()
{
wprintf(L"TESTING:\n");
if(true)
if(false)
{
// # Temporarily insert custom tests here
// ########################################
// ########################################
TestDefragmentationGpu(0);
TestDefragmentationGpu(VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT);
TestDefragmentationGpu(VMA_DEFRAGMENTATION_OPTIMAL_ALGORITHM_BIT);
TestDefragmentationSimple();
TestDefragmentationFull();
TestDefragmentationWholePool();
//TestDefragmentationSimple();
//TestDefragmentationFull();
//TestDefragmentationGpu(0);
//TestDefragmentationGpu(VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT);
//TestDefragmentationGpu(VMA_DEFRAGMENTATION_OPTIMAL_ALGORITHM_BIT);
return;
}
@ -4979,6 +5066,7 @@ void Test()
TestDefragmentationSimple();
TestDefragmentationFull();
TestDefragmentationWholePool();
TestDefragmentationGpu(0);
TestDefragmentationGpu(VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT);
TestDefragmentationGpu(VMA_DEFRAGMENTATION_OPTIMAL_ALGORITHM_BIT);

View File

@ -2615,7 +2615,10 @@ typedef struct VmaDefragmentationInfo2 {
/** \brief Pointer to array of allocations that can be defragmented.
The array should have `allocationCount` elements.
All other allocations are considered non-moveable during this defragmentation.
The array should not contain nulls.
Elements in the array should be unique - same allocation cannot occur twice.
It is safe to pass allocations that are in the lost state - they are ignored.
All allocations not present in this array are considered non-moveable during this defragmentation.
*/
VmaAllocation* pAllocations;
/** \brief Optional, output. Pointer to array that will be filled with information whether the allocation at certain index has been changed (moved or lost) during defragmentation.
@ -2624,6 +2627,25 @@ typedef struct VmaDefragmentationInfo2 {
You can pass null if you are not interested in this information.
*/
VkBool32* pAllocationsChanged;
/** \brief Numer of pools in `pPools` array.
*/
uint32_t poolCount;
/** \brief Either null or pointer to array of pools to be defragmented.
All the allocations in the specified pools can be moved during defragmentation
and there is no way to check if they were really moved as in `pAllocationsChanged`,
so you must query all the allocations in all these pools for new `VkDeviceMemory`
and offset using vmaGetAllocationInfo() if you might need to recreate buffers
and images bound to them.
The array should have `poolCount` elements.
The array should not contain nulls.
Elements in the array should be unique - same pool cannot occur twice.
Using this array is equivalent to specifying all allocations from the pools in `pAllocations`.
It might be more efficient.
*/
VmaPool* pPools;
/** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on CPU side, like `memcpy()`, `memmove()`.
`VK_WHOLE_SIZE` means no limit.
@ -2682,8 +2704,6 @@ typedef struct VmaDefragmentationStats {
uint32_t allocationsMoved;
/// Number of empty `VkDeviceMemory` objects that have been released to the system.
uint32_t deviceMemoryBlocksFreed;
/// Number of allocations that became lost in the process of defragmentation.
uint32_t allocationsLost;
} VmaDefragmentationStats;
/** \brief Begins defragmentation process.
@ -2698,14 +2718,15 @@ Use this function instead of old, deprecated vmaDefragment().
Warning! Between the call to vmaDefragmentationBegin() and vmaDefragmentationEnd():
- You should not use any of allocations passed as `pInfo->pAllocations`,
including calling vmaGetAllocationInfo(), vmaTouchAllocation(), or accessing
- You should not use any of allocations passed as `pInfo->pAllocations` or
any allocations that belong to pools passed as `pInfo->pPools`,
including calling vmaGetAllocationInfo(), vmaTouchAllocation(), or access
their data.
- Some mutexes protecting internal data structures may be locked, so trying to
make or free any allocations, bind buffers or images, map memory, or launch
another simultaneous defragmentation in between may cause stall (when done on
another thread) or deadlock (when done on the same thread), unless you are
100% sure that defragmented allocations are from different pool.
100% sure that defragmented allocations are in different pool.
- Information returned via `pStats` and `pInfo->pAllocationsChanged` are undefined.
They become valid after call to vmaDefragmentationEnd().
- If `pInfo->commandBuffer != VK_NULL_HANDLE`, you must submit that command buffer
@ -5085,6 +5106,8 @@ public:
virtual bool ResizeAllocation(const VmaAllocation alloc, VkDeviceSize newSize);
private:
friend class VmaDefragmentationAlgorithm;
uint32_t m_FreeCount;
VkDeviceSize m_SumFreeSize;
VmaSuballocationList m_Suballocations;
@ -5610,6 +5633,10 @@ public:
class VmaBlockVectorDefragmentationContext* pCtx,
VmaDefragmentationStats* pStats);
////////////////////////////////////////////////////////////////////////////////
// To be used only while the m_Mutex is locked. Used during defragmentation.
size_t CalcAllocationCount();
private:
friend class VmaDefragmentationAlgorithm;
@ -5715,6 +5742,7 @@ public:
virtual ~VmaDefragmentationAlgorithm();
void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged);
void AddAll() { m_AllAllocations = true; }
VkResult Defragment(
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves,
@ -5730,6 +5758,9 @@ private:
const uint32_t m_CurrentFrameIndex;
const uint32_t m_AlgorithmFlags;
uint32_t m_AllocationCount;
bool m_AllAllocations;
VkDeviceSize m_BytesMoved;
uint32_t m_AllocationsMoved;
@ -5743,6 +5774,11 @@ private:
m_pChanged(VMA_NULL)
{
}
AllocationInfo(VmaAllocation hAlloc, VkBool32* pChanged) :
m_hAllocation(hAlloc),
m_pChanged(pChanged)
{
}
};
struct AllocationInfoSizeGreater
@ -5761,9 +5797,6 @@ private:
}
};
// Used between AddAllocation and Defragment.
VmaVector< AllocationInfo, VmaStlAllocator<AllocationInfo> > m_Allocations;
struct BlockInfo
{
size_t m_OriginalBlockIndex;
@ -5885,14 +5918,30 @@ public:
VmaBlockVector* GetBlockVector() const { return m_pBlockVector; }
VmaDefragmentationAlgorithm* GetAlgorithm() const { return m_pAlgorithm; }
void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged);
void AddAll() { m_AllAllocations = true; }
void Begin();
private:
const VmaAllocator m_hAllocator;
// Null if not from custom pool.
const VmaPool m_hCustomPool;
// Redundant, for convenience not to fetch from m_hCustomPool->m_BlockVector or m_hAllocator->m_pBlockVectors.
VmaBlockVector* const m_pBlockVector;
const uint32_t m_CurrFrameIndex;
const uint32_t m_AlgorithmFlags;
// Owner of this object.
VmaDefragmentationAlgorithm* m_pAlgorithm;
struct AllocInfo
{
VmaAllocation hAlloc;
VkBool32* pChanged;
};
// Used between constructor and Begin.
VmaVector< AllocInfo, VmaStlAllocator<AllocInfo> > m_Allocations;
bool m_AllAllocations;
};
struct VmaDefragmentationContext_T
@ -5907,6 +5956,7 @@ public:
VmaDefragmentationStats* pStats);
~VmaDefragmentationContext_T();
void AddPools(uint32_t poolCount, VmaPool* pPools);
void AddAllocations(
uint32_t allocationCount,
VmaAllocation* pAllocations,
@ -11699,7 +11749,10 @@ void VmaBlockVector::Defragment(
pCtx->mutexLocked = true;
}
pCtx->Begin();
// Defragment.
const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove;
const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove;
VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> > moves =
@ -11768,6 +11821,16 @@ void VmaBlockVector::DefragmentationEnd(
}
}
size_t VmaBlockVector::CalcAllocationCount()
{
size_t result = 0;
for(size_t i = 0; i < m_Blocks.size(); ++i)
{
result += m_Blocks[i]->m_pMetadata->GetAllocationCount();
}
return result;
}
void VmaBlockVector::MakePoolAllocationsLost(
uint32_t currentFrameIndex,
size_t* pLostAllocationCount)
@ -11839,11 +11902,24 @@ VmaDefragmentationAlgorithm::VmaDefragmentationAlgorithm(
m_pBlockVector(pBlockVector),
m_CurrentFrameIndex(currentFrameIndex),
m_AlgorithmFlags(algorithmFlags),
m_AllAllocations(false),
m_AllocationCount(0),
m_BytesMoved(0),
m_AllocationsMoved(0),
m_Allocations(VmaStlAllocator<AllocationInfo>(hAllocator->GetAllocationCallbacks())),
m_Blocks(VmaStlAllocator<BlockInfo*>(hAllocator->GetAllocationCallbacks()))
{
// Create block info for each block.
const size_t blockCount = m_pBlockVector->m_Blocks.size();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks());
pBlockInfo->m_OriginalBlockIndex = blockIndex;
pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex];
m_Blocks.push_back(pBlockInfo);
}
// Sort them by m_pBlock pointer value.
VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess());
}
VmaDefragmentationAlgorithm::~VmaDefragmentationAlgorithm()
@ -11856,10 +11932,23 @@ VmaDefragmentationAlgorithm::~VmaDefragmentationAlgorithm()
void VmaDefragmentationAlgorithm::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged)
{
AllocationInfo allocInfo;
allocInfo.m_hAllocation = hAlloc;
allocInfo.m_pChanged = pChanged;
m_Allocations.push_back(allocInfo);
// Now as we are inside VmaBlockVector::m_Mutex, we can make final check if this allocation was not lost.
if(hAlloc->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST)
{
VmaDeviceMemoryBlock* pBlock = hAlloc->GetBlock();
BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess());
if(it != m_Blocks.end() && (*it)->m_pBlock == pBlock)
{
AllocationInfo allocInfo = AllocationInfo(hAlloc, pChanged);
(*it)->m_Allocations.push_back(allocInfo);
}
else
{
VMA_ASSERT(0);
}
++m_AllocationCount;
}
}
VkResult VmaDefragmentationAlgorithm::DefragmentRound(
@ -12031,48 +12120,31 @@ VkResult VmaDefragmentationAlgorithm::Defragment(
VkDeviceSize maxBytesToMove,
uint32_t maxAllocationsToMove)
{
if(m_Allocations.empty())
if(!m_AllAllocations && m_AllocationCount == 0)
{
return VK_SUCCESS;
}
// Create block info for each block.
const size_t blockCount = m_pBlockVector->m_Blocks.size();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks());
pBlockInfo->m_OriginalBlockIndex = blockIndex;
pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex];
m_Blocks.push_back(pBlockInfo);
}
// Sort them by m_pBlock pointer value.
VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess());
// Move allocation infos from m_Allocations to appropriate m_Blocks[memTypeIndex].m_Allocations.
for(size_t blockIndex = 0, allocCount = m_Allocations.size(); blockIndex < allocCount; ++blockIndex)
{
AllocationInfo& allocInfo = m_Allocations[blockIndex];
// Now as we are inside VmaBlockVector::m_Mutex, we can make final check if this allocation was not lost.
if(allocInfo.m_hAllocation->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST)
{
VmaDeviceMemoryBlock* pBlock = allocInfo.m_hAllocation->GetBlock();
BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess());
if(it != m_Blocks.end() && (*it)->m_pBlock == pBlock)
{
(*it)->m_Allocations.push_back(allocInfo);
}
else
{
VMA_ASSERT(0);
}
}
}
m_Allocations.clear();
const size_t blockCount = m_Blocks.size();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
BlockInfo* pBlockInfo = m_Blocks[blockIndex];
if(m_AllAllocations)
{
VmaBlockMetadata_Generic* pMetadata = (VmaBlockMetadata_Generic*)pBlockInfo->m_pBlock->m_pMetadata;
for(VmaSuballocationList::const_iterator it = pMetadata->m_Suballocations.begin();
it != pMetadata->m_Suballocations.end();
++it)
{
if(it->type != VMA_SUBALLOCATION_TYPE_FREE)
{
AllocationInfo allocInfo = AllocationInfo(it->hAllocation, VMA_NULL);
pBlockInfo->m_Allocations.push_back(allocInfo);
}
}
}
pBlockInfo->CalcHasNonMovableAllocations();
if((m_AlgorithmFlags & VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT) != 0)
{
@ -12132,10 +12204,12 @@ VmaBlockVectorDefragmentationContext::VmaBlockVectorDefragmentationContext(
m_hAllocator(hAllocator),
m_hCustomPool(hCustomPool),
m_pBlockVector(pBlockVector),
m_pAlgorithm(VMA_NULL)
m_CurrFrameIndex(currFrameIndex),
m_AlgorithmFlags(algorithmFlags),
m_pAlgorithm(VMA_NULL),
m_Allocations(VmaStlAllocator<AllocInfo>(hAllocator->GetAllocationCallbacks())),
m_AllAllocations(false)
{
m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm)(
m_hAllocator, m_pBlockVector, currFrameIndex, algorithmFlags);
}
VmaBlockVectorDefragmentationContext::~VmaBlockVectorDefragmentationContext()
@ -12143,6 +12217,33 @@ VmaBlockVectorDefragmentationContext::~VmaBlockVectorDefragmentationContext()
vma_delete(m_hAllocator, m_pAlgorithm);
}
void VmaBlockVectorDefragmentationContext::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged)
{
AllocInfo info = { hAlloc, pChanged };
m_Allocations.push_back(info);
}
void VmaBlockVectorDefragmentationContext::Begin()
{
const bool allAllocations = m_AllAllocations ||
m_Allocations.size() == m_pBlockVector->CalcAllocationCount();
m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm)(
m_hAllocator, m_pBlockVector, m_CurrFrameIndex, m_AlgorithmFlags);
if(allAllocations)
{
m_pAlgorithm->AddAll();
}
else
{
for(size_t i = 0, count = m_Allocations.size(); i < count; ++i)
{
m_pAlgorithm->AddAllocation(m_Allocations[i].hAlloc, m_Allocations[i].pChanged);
}
}
}
////////////////////////////////////////////////////////////////////////////////
// VmaDefragmentationContext
@ -12179,13 +12280,49 @@ VmaDefragmentationContext_T::~VmaDefragmentationContext_T()
}
}
void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, VmaPool* pPools)
{
for(uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex)
{
VmaPool pool = pPools[poolIndex];
VMA_ASSERT(pool);
// Pools with algorithm other than default are not defragmented.
if(pool->m_BlockVector.GetAlgorithm() == 0)
{
VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL;
for(size_t i = m_CustomPoolContexts.size(); i--; )
{
if(m_CustomPoolContexts[i]->GetCustomPool() == pool)
{
pBlockVectorDefragCtx = m_CustomPoolContexts[i];
break;
}
}
if(!pBlockVectorDefragCtx)
{
pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)(
m_hAllocator,
pool,
&pool->m_BlockVector,
m_CurrFrameIndex,
m_AlgorithmFlags);
m_CustomPoolContexts.push_back(pBlockVectorDefragCtx);
}
pBlockVectorDefragCtx->AddAll();
}
}
}
void VmaDefragmentationContext_T::AddAllocations(
uint32_t allocationCount,
VmaAllocation* pAllocations,
VkBool32* pAllocationsChanged)
{
// Dispatch pAllocations among defragmentators. Create them when necessary.
for(size_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
{
const VmaAllocation hAlloc = pAllocations[allocIndex];
VMA_ASSERT(hAlloc);
@ -12244,7 +12381,7 @@ void VmaDefragmentationContext_T::AddAllocations(
{
VkBool32* const pChanged = (pAllocationsChanged != VMA_NULL) ?
&pAllocationsChanged[allocIndex] : VMA_NULL;
pBlockVectorDefragCtx->GetAlgorithm()->AddAllocation(hAlloc, pChanged);
pBlockVectorDefragCtx->AddAllocation(hAlloc, pChanged);
}
}
}
@ -13580,6 +13717,7 @@ VkResult VmaAllocator_T::DefragmentationBegin(
*pContext = vma_new(this, VmaDefragmentationContext_T)(
this, m_CurrentFrameIndex.load(), algorithmFlags, pStats);
(*pContext)->AddPools(info.poolCount, info.pPools);
(*pContext)->AddAllocations(
info.allocationCount, info.pAllocations, info.pAllocationsChanged);