Added benchmark for linear allocator.

This commit is contained in:
Adam Sawicki 2018-08-24 11:18:41 +02:00
parent dedab850e9
commit 0a60713b07
4 changed files with 200 additions and 18 deletions

View File

@ -206,6 +206,19 @@ private:
uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); }
};
// Wrapper for RandomNumberGenerator compatible with STL "UniformRandomNumberGenerator" idea.
struct MyUniformRandomNumberGenerator
{
typedef uint32_t result_type;
MyUniformRandomNumberGenerator(RandomNumberGenerator& gen) : m_Gen(gen) { }
static uint32_t min() { return 0; }
static uint32_t max() { return UINT32_MAX; }
uint32_t operator()() { return m_Gen.Generate(); }
private:
RandomNumberGenerator& m_Gen;
};
void ReadFile(std::vector<char>& out, const char* fileName);
enum class CONSOLE_COLOR

View File

@ -7,8 +7,26 @@
#ifdef _WIN32
enum CONFIG_TYPE {
CONFIG_TYPE_MINIMUM,
CONFIG_TYPE_SMALL,
CONFIG_TYPE_AVERAGE,
CONFIG_TYPE_LARGE,
CONFIG_TYPE_MAXIMUM,
CONFIG_TYPE_COUNT
};
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
static const wchar_t* FREE_ORDER_NAMES[] = {
L"FORWARD",
L"BACKWARD",
L"RANDOM",
};
struct AllocationSize
{
uint32_t Probability;
@ -1948,6 +1966,169 @@ static void ManuallyTestLinearAllocator()
vmaDestroyPool(g_hAllocator, pool);
}
static void BenchmarkLinearAllocatorCase(bool linear, bool empty, FREE_ORDER freeOrder)
{
RandomNumberGenerator rand{16223};
const VkDeviceSize bufSizeMin = 32;
const VkDeviceSize bufSizeMax = 1024;
const size_t maxBufCapacity = 10000;
const uint32_t iterationCount = 10;
VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
sampleBufCreateInfo.size = bufSizeMax;
sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo sampleAllocCreateInfo = {};
sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VmaPoolCreateInfo poolCreateInfo = {};
VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
assert(res == VK_SUCCESS);
poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
if(linear)
poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
VmaPool pool = nullptr;
res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
assert(res == VK_SUCCESS);
// Buffer created just to get memory requirements. Never bound to any memory.
VkBuffer dummyBuffer = VK_NULL_HANDLE;
res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, nullptr, &dummyBuffer);
assert(res == VK_SUCCESS && dummyBuffer);
VkMemoryRequirements memReq = {};
vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
vkDestroyBuffer(g_hDevice, dummyBuffer, nullptr);
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.pool = pool;
VmaAllocation alloc;
std::vector<VmaAllocation> baseAllocations;
if(!empty)
{
// Make allocations up to half of pool size.
VkDeviceSize totalSize = 0;
while(totalSize < poolCreateInfo.blockSize / 2)
{
memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
assert(res == VK_SUCCESS);
baseAllocations.push_back(alloc);
totalSize += memReq.size;
}
// Delete half of them, choose randomly.
size_t allocsToDelete = baseAllocations.size() / 2;
for(size_t i = 0; i < allocsToDelete; ++i)
{
const size_t index = (size_t)rand.Generate() % baseAllocations.size();
vmaFreeMemory(g_hAllocator, baseAllocations[index]);
baseAllocations.erase(baseAllocations.begin() + index);
}
}
// BENCHMARK
const size_t allocCount = maxBufCapacity / 2;
std::vector<VmaAllocation> testAllocations;
testAllocations.reserve(allocCount);
duration allocTotalDuration = duration::zero();
duration freeTotalDuration = duration::zero();
for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
{
// Allocations
time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < allocCount; ++i)
{
memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
assert(res == VK_SUCCESS);
testAllocations.push_back(alloc);
}
allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
// Deallocations
switch(freeOrder)
{
case FREE_ORDER::FORWARD:
// Leave testAllocations unchanged.
break;
case FREE_ORDER::BACKWARD:
std::reverse(testAllocations.begin(), testAllocations.end());
break;
case FREE_ORDER::RANDOM:
std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
break;
default: assert(0);
}
time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < allocCount; ++i)
vmaFreeMemory(g_hAllocator, testAllocations[i]);
freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
testAllocations.clear();
}
// Delete baseAllocations
while(!baseAllocations.empty())
{
vmaFreeMemory(g_hAllocator, baseAllocations.back());
baseAllocations.pop_back();
}
vmaDestroyPool(g_hAllocator, pool);
wprintf(L" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",
linear ? 1 : 0,
empty ? L"Empty" : L"Not empty",
FREE_ORDER_NAMES[(size_t)freeOrder],
ToFloatSeconds(allocTotalDuration),
ToFloatSeconds(freeTotalDuration));
}
static void BenchmarkLinearAllocator()
{
wprintf(L"Benchmark linear allocator\n");
uint32_t freeOrderCount = 1;
if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
freeOrderCount = 3;
else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
freeOrderCount = 2;
const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
{
FREE_ORDER freeOrder = FREE_ORDER::COUNT;
switch(freeOrderIndex)
{
case 0: freeOrder = FREE_ORDER::BACKWARD; break;
case 1: freeOrder = FREE_ORDER::FORWARD; break;
case 2: freeOrder = FREE_ORDER::RANDOM; break;
default: assert(0);
}
for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
{
for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex)
{
BenchmarkLinearAllocatorCase(
linearIndex ? 1 : 0, // linear
emptyIndex ? 0 : 1, // empty
freeOrder); // freeOrder
}
}
}
}
static void TestPool_SameSize()
{
const VkDeviceSize BUF_SIZE = 1024 * 1024;
@ -3194,17 +3375,6 @@ static void PerformCustomPoolTest(FILE* file)
WritePoolTestResult(file, "Code desc", "Test desc", config, result);
}
enum CONFIG_TYPE {
CONFIG_TYPE_MINIMUM,
CONFIG_TYPE_SMALL,
CONFIG_TYPE_AVERAGE,
CONFIG_TYPE_LARGE,
CONFIG_TYPE_MAXIMUM,
CONFIG_TYPE_COUNT
};
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
static const char* CODE_DESCRIPTION = "Foo";
static void PerformMainTests(FILE* file)
@ -3687,6 +3857,7 @@ void Test()
TestMappingMultithreaded();
TestLinearAllocator();
ManuallyTestLinearAllocator();
BenchmarkLinearAllocator();
TestDefragmentationSimple();
TestDefragmentationFull();

View File

@ -16,16 +16,14 @@ macros if you want to configure the library and then include its header to
include all public interface declarations. Example:
*/
//#define VMA_USE_STL_CONTAINERS 1
//#define VMA_HEAVY_ASSERT(expr) assert(expr)
//#define VMA_USE_STL_CONTAINERS 1
//#define VMA_DEDICATED_ALLOCATION 0
//#define VMA_DEBUG_MARGIN 16
//#define VMA_DEBUG_DETECT_CORRUPTION 1
//#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
//#define VMA_RECORDING_ENABLED 0
//#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256
#pragma warning(push, 4)
#pragma warning(disable: 4127) // conditional expression is constant

View File

@ -641,7 +641,7 @@ you can achieve behavior of a ring buffer / queue.
![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)
Pools with linear algorithm support lost allocations when used as ring buffer.
Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer.
If there is not enough free space for a new allocation, but existing allocations
from the front of the queue can become lost, they become lost and the allocation
succeeds.
@ -8333,7 +8333,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest(
for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
{
const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
if(VmaBlocksOnSamePage(nextSuballoc.offset, nextSuballoc.size, resultOffset, bufferImageGranularity))
if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
{
if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))
{
@ -8364,7 +8364,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest(
for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
{
const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
if(VmaBlocksOnSamePage(resultOffset, allocSize, prevSuballoc.offset, bufferImageGranularity))
if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
{
if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))
{