Added benchmark for linear allocator.

This commit is contained in:
Adam Sawicki 2018-08-24 11:18:41 +02:00
parent dedab850e9
commit 0a60713b07
4 changed files with 200 additions and 18 deletions

View File

@ -206,6 +206,19 @@ private:
uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); } uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); }
}; };
// Wrapper for RandomNumberGenerator compatible with STL "UniformRandomNumberGenerator" idea.
struct MyUniformRandomNumberGenerator
{
typedef uint32_t result_type;
MyUniformRandomNumberGenerator(RandomNumberGenerator& gen) : m_Gen(gen) { }
static uint32_t min() { return 0; }
static uint32_t max() { return UINT32_MAX; }
uint32_t operator()() { return m_Gen.Generate(); }
private:
RandomNumberGenerator& m_Gen;
};
void ReadFile(std::vector<char>& out, const char* fileName); void ReadFile(std::vector<char>& out, const char* fileName);
enum class CONSOLE_COLOR enum class CONSOLE_COLOR

View File

@ -7,8 +7,26 @@
#ifdef _WIN32 #ifdef _WIN32
enum CONFIG_TYPE {
CONFIG_TYPE_MINIMUM,
CONFIG_TYPE_SMALL,
CONFIG_TYPE_AVERAGE,
CONFIG_TYPE_LARGE,
CONFIG_TYPE_MAXIMUM,
CONFIG_TYPE_COUNT
};
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT }; enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
static const wchar_t* FREE_ORDER_NAMES[] = {
L"FORWARD",
L"BACKWARD",
L"RANDOM",
};
struct AllocationSize struct AllocationSize
{ {
uint32_t Probability; uint32_t Probability;
@ -1948,6 +1966,169 @@ static void ManuallyTestLinearAllocator()
vmaDestroyPool(g_hAllocator, pool); vmaDestroyPool(g_hAllocator, pool);
} }
static void BenchmarkLinearAllocatorCase(bool linear, bool empty, FREE_ORDER freeOrder)
{
RandomNumberGenerator rand{16223};
const VkDeviceSize bufSizeMin = 32;
const VkDeviceSize bufSizeMax = 1024;
const size_t maxBufCapacity = 10000;
const uint32_t iterationCount = 10;
VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
sampleBufCreateInfo.size = bufSizeMax;
sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo sampleAllocCreateInfo = {};
sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VmaPoolCreateInfo poolCreateInfo = {};
VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
assert(res == VK_SUCCESS);
poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
if(linear)
poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
VmaPool pool = nullptr;
res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
assert(res == VK_SUCCESS);
// Buffer created just to get memory requirements. Never bound to any memory.
VkBuffer dummyBuffer = VK_NULL_HANDLE;
res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, nullptr, &dummyBuffer);
assert(res == VK_SUCCESS && dummyBuffer);
VkMemoryRequirements memReq = {};
vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
vkDestroyBuffer(g_hDevice, dummyBuffer, nullptr);
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.pool = pool;
VmaAllocation alloc;
std::vector<VmaAllocation> baseAllocations;
if(!empty)
{
// Make allocations up to half of pool size.
VkDeviceSize totalSize = 0;
while(totalSize < poolCreateInfo.blockSize / 2)
{
memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
assert(res == VK_SUCCESS);
baseAllocations.push_back(alloc);
totalSize += memReq.size;
}
// Delete half of them, choose randomly.
size_t allocsToDelete = baseAllocations.size() / 2;
for(size_t i = 0; i < allocsToDelete; ++i)
{
const size_t index = (size_t)rand.Generate() % baseAllocations.size();
vmaFreeMemory(g_hAllocator, baseAllocations[index]);
baseAllocations.erase(baseAllocations.begin() + index);
}
}
// BENCHMARK
const size_t allocCount = maxBufCapacity / 2;
std::vector<VmaAllocation> testAllocations;
testAllocations.reserve(allocCount);
duration allocTotalDuration = duration::zero();
duration freeTotalDuration = duration::zero();
for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
{
// Allocations
time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < allocCount; ++i)
{
memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
assert(res == VK_SUCCESS);
testAllocations.push_back(alloc);
}
allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
// Deallocations
switch(freeOrder)
{
case FREE_ORDER::FORWARD:
// Leave testAllocations unchanged.
break;
case FREE_ORDER::BACKWARD:
std::reverse(testAllocations.begin(), testAllocations.end());
break;
case FREE_ORDER::RANDOM:
std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
break;
default: assert(0);
}
time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < allocCount; ++i)
vmaFreeMemory(g_hAllocator, testAllocations[i]);
freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
testAllocations.clear();
}
// Delete baseAllocations
while(!baseAllocations.empty())
{
vmaFreeMemory(g_hAllocator, baseAllocations.back());
baseAllocations.pop_back();
}
vmaDestroyPool(g_hAllocator, pool);
wprintf(L" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n",
linear ? 1 : 0,
empty ? L"Empty" : L"Not empty",
FREE_ORDER_NAMES[(size_t)freeOrder],
ToFloatSeconds(allocTotalDuration),
ToFloatSeconds(freeTotalDuration));
}
static void BenchmarkLinearAllocator()
{
wprintf(L"Benchmark linear allocator\n");
uint32_t freeOrderCount = 1;
if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
freeOrderCount = 3;
else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
freeOrderCount = 2;
const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
{
FREE_ORDER freeOrder = FREE_ORDER::COUNT;
switch(freeOrderIndex)
{
case 0: freeOrder = FREE_ORDER::BACKWARD; break;
case 1: freeOrder = FREE_ORDER::FORWARD; break;
case 2: freeOrder = FREE_ORDER::RANDOM; break;
default: assert(0);
}
for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
{
for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex)
{
BenchmarkLinearAllocatorCase(
linearIndex ? 1 : 0, // linear
emptyIndex ? 0 : 1, // empty
freeOrder); // freeOrder
}
}
}
}
static void TestPool_SameSize() static void TestPool_SameSize()
{ {
const VkDeviceSize BUF_SIZE = 1024 * 1024; const VkDeviceSize BUF_SIZE = 1024 * 1024;
@ -3194,17 +3375,6 @@ static void PerformCustomPoolTest(FILE* file)
WritePoolTestResult(file, "Code desc", "Test desc", config, result); WritePoolTestResult(file, "Code desc", "Test desc", config, result);
} }
enum CONFIG_TYPE {
CONFIG_TYPE_MINIMUM,
CONFIG_TYPE_SMALL,
CONFIG_TYPE_AVERAGE,
CONFIG_TYPE_LARGE,
CONFIG_TYPE_MAXIMUM,
CONFIG_TYPE_COUNT
};
static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
static const char* CODE_DESCRIPTION = "Foo"; static const char* CODE_DESCRIPTION = "Foo";
static void PerformMainTests(FILE* file) static void PerformMainTests(FILE* file)
@ -3687,6 +3857,7 @@ void Test()
TestMappingMultithreaded(); TestMappingMultithreaded();
TestLinearAllocator(); TestLinearAllocator();
ManuallyTestLinearAllocator(); ManuallyTestLinearAllocator();
BenchmarkLinearAllocator();
TestDefragmentationSimple(); TestDefragmentationSimple();
TestDefragmentationFull(); TestDefragmentationFull();

View File

@ -16,16 +16,14 @@ macros if you want to configure the library and then include its header to
include all public interface declarations. Example: include all public interface declarations. Example:
*/ */
//#define VMA_USE_STL_CONTAINERS 1
//#define VMA_HEAVY_ASSERT(expr) assert(expr) //#define VMA_HEAVY_ASSERT(expr) assert(expr)
//#define VMA_USE_STL_CONTAINERS 1
//#define VMA_DEDICATED_ALLOCATION 0 //#define VMA_DEDICATED_ALLOCATION 0
//#define VMA_DEBUG_MARGIN 16 //#define VMA_DEBUG_MARGIN 16
//#define VMA_DEBUG_DETECT_CORRUPTION 1 //#define VMA_DEBUG_DETECT_CORRUPTION 1
//#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 //#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
//#define VMA_RECORDING_ENABLED 0 //#define VMA_RECORDING_ENABLED 0
//#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256
#pragma warning(push, 4) #pragma warning(push, 4)
#pragma warning(disable: 4127) // conditional expression is constant #pragma warning(disable: 4127) // conditional expression is constant

View File

@ -641,7 +641,7 @@ you can achieve behavior of a ring buffer / queue.
![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) ![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)
Pools with linear algorithm support lost allocations when used as ring buffer. Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer.
If there is not enough free space for a new allocation, but existing allocations If there is not enough free space for a new allocation, but existing allocations
from the front of the queue can become lost, they become lost and the allocation from the front of the queue can become lost, they become lost and the allocation
succeeds. succeeds.
@ -8333,7 +8333,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest(
for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
{ {
const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
if(VmaBlocksOnSamePage(nextSuballoc.offset, nextSuballoc.size, resultOffset, bufferImageGranularity)) if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
{ {
if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))
{ {
@ -8364,7 +8364,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest(
for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
{ {
const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
if(VmaBlocksOnSamePage(resultOffset, allocSize, prevSuballoc.offset, bufferImageGranularity)) if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
{ {
if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))
{ {