diff --git a/LICENSE.txt b/LICENSE.txt index 67b0d01..bee6af7 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 8963ba1..8e5d1c0 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Additional features: - Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion. - Support for Vulkan 1.0 as well as 1.1. - Support for VK_KHR_dedicated_allocation extension: Just enable it and it will be used automatically by the library. +- Support for VK_AMD_device_coherent_memory extension. - Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted. - Lost allocations: Allocate memory with appropriate flags and let the library remove allocations that are not used for many frames to make room for new ones. - Statistics: Obtain detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type. diff --git a/docs/Recording file format.md b/docs/Recording file format.md index 586064a..6ca218c 100644 --- a/docs/Recording file format.md +++ b/docs/Recording file format.md @@ -60,6 +60,7 @@ Between them there can be zero or more lines with configuration options. They st Extension,VK_KHR_dedicated_allocation, Extension,VK_KHR_bind_memory2, Extension,VK_EXT_memory_budget, + Extension,VK_AMD_device_coherent_memory, Macro,VMA_DEBUG_ALWAYS_DEDICATED_MEMORY, Macro,VMA_DEBUG_ALIGNMENT, diff --git a/src/Common.cpp b/src/Common.cpp index 790cdda..c51ccca 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/Common.h b/src/Common.h index ec564d6..9019955 100644 --- a/src/Common.h +++ b/src/Common.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -64,11 +64,15 @@ typedef std::chrono::high_resolution_clock::duration duration; #define ERR_GUARD_VULKAN(expr) TEST((expr) >= 0) +extern VkInstance g_hVulkanInstance; extern VkPhysicalDevice g_hPhysicalDevice; extern VkDevice g_hDevice; extern VkInstance g_hVulkanInstance; extern VmaAllocator g_hAllocator; extern bool g_MemoryAliasingWarningEnabled; +extern bool VK_AMD_device_coherent_memory_enabled; + +void SetAllocatorCreateInfo(VmaAllocatorCreateInfo& outInfo); inline float ToFloatSeconds(duration d) { diff --git a/src/Shaders/Shader.frag b/src/Shaders/Shader.frag index 4060483..bbe754b 100644 --- a/src/Shaders/Shader.frag +++ b/src/Shaders/Shader.frag @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/Shaders/Shader.vert b/src/Shaders/Shader.vert index 06ff262..e62544f 100644 --- a/src/Shaders/Shader.vert +++ b/src/Shaders/Shader.vert @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/Shaders/SparseBindingTest.comp b/src/Shaders/SparseBindingTest.comp index 7c8889e..f615278 100644 --- a/src/Shaders/SparseBindingTest.comp +++ b/src/Shaders/SparseBindingTest.comp @@ -1,5 +1,5 @@ // -// Copyright (c) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/SparseBindingTest.cpp b/src/SparseBindingTest.cpp index 8fbd5e9..3c63641 100644 --- a/src/SparseBindingTest.cpp +++ b/src/SparseBindingTest.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/SparseBindingTest.h b/src/SparseBindingTest.h index 69b95d6..3b7a6e1 100644 --- a/src/SparseBindingTest.h +++ b/src/SparseBindingTest.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/Tests.cpp b/src/Tests.cpp index 16ac94b..1890326 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -4831,6 +4831,93 @@ static void TestMemoryUsage() } } +static uint32_t FindDeviceCoherentMemoryTypeBits() +{ + VkPhysicalDeviceMemoryProperties memProps; + vkGetPhysicalDeviceMemoryProperties(g_hPhysicalDevice, &memProps); + + uint32_t memTypeBits = 0; + for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i) + { + if(memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD) + memTypeBits |= 1u << i; + } + return memTypeBits; +} + +static void TestDeviceCoherentMemory() +{ + if(!VK_AMD_device_coherent_memory_enabled) + return; + + uint32_t deviceCoherentMemoryTypeBits = FindDeviceCoherentMemoryTypeBits(); + // Extension is enabled, feature is enabled, and the device still doesn't support any such memory type? + // OK then, so it's just fake! + if(deviceCoherentMemoryTypeBits == 0) + return; + + wprintf(L"Testing device coherent memory...\n"); + + // 1. Try to allocate buffer from a memory type that is DEVICE_COHERENT. + + VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + bufCreateInfo.size = 0x10000; + bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD; + + AllocInfo alloc = {}; + VmaAllocationInfo allocInfo = {}; + VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo); + + // Make sure it succeeded and was really created in such memory type. + TEST(res == VK_SUCCESS); + TEST((1u << allocInfo.memoryType) & deviceCoherentMemoryTypeBits); + + alloc.Destroy(); + + // 2. Try to create a pool in such memory type. + { + VmaPoolCreateInfo poolCreateInfo = {}; + + res = vmaFindMemoryTypeIndex(g_hAllocator, UINT32_MAX, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex); + TEST(res == VK_SUCCESS); + TEST((1u << poolCreateInfo.memoryTypeIndex) & deviceCoherentMemoryTypeBits); + + VmaPool pool = VK_NULL_HANDLE; + res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool); + TEST(res == VK_SUCCESS); + + vmaDestroyPool(g_hAllocator, pool); + } + + // 3. Try the same with a local allocator created without VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT. + + VmaAllocatorCreateInfo allocatorCreateInfo = {}; + SetAllocatorCreateInfo(allocatorCreateInfo); + allocatorCreateInfo.flags &= ~VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT; + + VmaAllocator localAllocator = VK_NULL_HANDLE; + res = vmaCreateAllocator(&allocatorCreateInfo, &localAllocator); + TEST(res == VK_SUCCESS && localAllocator); + + res = vmaCreateBuffer(localAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo); + + // Make sure it failed. + TEST(res != VK_SUCCESS && !alloc.m_Buffer && !alloc.m_Allocation); + + // 4. Try to find memory type. + { + uint32_t memTypeIndex = UINT_MAX; + res = vmaFindMemoryTypeIndex(localAllocator, UINT32_MAX, &allocCreateInfo, &memTypeIndex); + TEST(res != VK_SUCCESS); + } + + vmaDestroyAllocator(localAllocator); +} + static void TestBudget() { wprintf(L"Testing budget...\n"); @@ -6163,6 +6250,7 @@ void Test() TestAllocationsInitialization(); #endif TestMemoryUsage(); + TestDeviceCoherentMemory(); TestBudget(); TestMapping(); TestDeviceLocalMapped(); diff --git a/src/Tests.h b/src/Tests.h index df79318..d259fa9 100644 --- a/src/Tests.h +++ b/src/Tests.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/Common.cpp b/src/VmaReplay/Common.cpp index 104d390..b2bedf1 100644 --- a/src/VmaReplay/Common.cpp +++ b/src/VmaReplay/Common.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/Common.h b/src/VmaReplay/Common.h index 5d52e5c..3f966c9 100644 --- a/src/VmaReplay/Common.h +++ b/src/VmaReplay/Common.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/Constants.cpp b/src/VmaReplay/Constants.cpp index a7dc27d..28d9c3b 100644 --- a/src/VmaReplay/Constants.cpp +++ b/src/VmaReplay/Constants.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/Constants.h b/src/VmaReplay/Constants.h index 4a7479c..d2a5db0 100644 --- a/src/VmaReplay/Constants.h +++ b/src/VmaReplay/Constants.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/VmaReplay.cpp b/src/VmaReplay/VmaReplay.cpp index a8f474c..18991f9 100644 --- a/src/VmaReplay/VmaReplay.cpp +++ b/src/VmaReplay/VmaReplay.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -672,6 +672,7 @@ static bool g_UserDataEnabled = true; static bool g_MemStatsEnabled = false; VULKAN_EXTENSION_REQUEST g_VK_LAYER_LUNARG_standard_validation = VULKAN_EXTENSION_REQUEST::DEFAULT; VULKAN_EXTENSION_REQUEST g_VK_EXT_memory_budget_request = VULKAN_EXTENSION_REQUEST::DEFAULT; +VULKAN_EXTENSION_REQUEST g_VK_AMD_device_coherent_memory_request = VULKAN_EXTENSION_REQUEST::DEFAULT; struct StatsAfterLineEntry { @@ -1084,6 +1085,7 @@ private: Extension_VK_KHR_dedicated_allocation, Extension_VK_KHR_bind_memory2, Extension_VK_EXT_memory_budget, + Extension_VK_AMD_device_coherent_memory, Macro_VMA_DEBUG_ALWAYS_DEDICATED_MEMORY, Macro_VMA_DEBUG_ALIGNMENT, Macro_VMA_DEBUG_MARGIN, @@ -1218,6 +1220,8 @@ bool ConfigurationParser::Parse(LineSplit& lineSplit) SetOption(currLineNumber, OPTION::Extension_VK_KHR_bind_memory2, csvSplit.GetRange(2)); else if(StrRangeEq(subOptionName, "VK_EXT_memory_budget")) SetOption(currLineNumber, OPTION::Extension_VK_EXT_memory_budget, csvSplit.GetRange(2)); + else if(StrRangeEq(subOptionName, "VK_AMD_device_coherent_memory")) + SetOption(currLineNumber, OPTION::Extension_VK_AMD_device_coherent_memory, csvSplit.GetRange(2)); else printf("Line %zu: Unrecognized configuration option.\n", currLineNumber); } @@ -2223,6 +2227,11 @@ int Player::InitVulkan() default: assert(0); } + if(g_VK_AMD_device_coherent_memory_request == VULKAN_EXTENSION_REQUEST::ENABLED) + { + printf("WARNING: AMD_device_coherent_memory requested but not currently supported by the player.\n"); + } + if(m_MemoryBudgetEnabled) { enabledDeviceExtensions.push_back(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); diff --git a/src/VmaReplay/VmaUsage.cpp b/src/VmaReplay/VmaUsage.cpp index ee781cc..20555a4 100644 --- a/src/VmaReplay/VmaUsage.cpp +++ b/src/VmaReplay/VmaUsage.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaReplay/VmaUsage.h b/src/VmaReplay/VmaUsage.h index 1c96761..5c1b481 100644 --- a/src/VmaReplay/VmaUsage.h +++ b/src/VmaReplay/VmaUsage.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaUsage.cpp b/src/VmaUsage.cpp index 6dc7d7b..5dc0ded 100644 --- a/src/VmaUsage.cpp +++ b/src/VmaUsage.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VmaUsage.h b/src/VmaUsage.h index 5d82451..bec7ac7 100644 --- a/src/VmaUsage.h +++ b/src/VmaUsage.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/VulkanSample.cpp b/src/VulkanSample.cpp index e9872d5..7b6774f 100644 --- a/src/VulkanSample.cpp +++ b/src/VulkanSample.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -46,12 +46,13 @@ VmaAllocator g_hAllocator; VkInstance g_hVulkanInstance; bool g_MemoryAliasingWarningEnabled = true; -static bool g_EnableValidationLayer = true; -static bool VK_KHR_get_memory_requirements2_enabled = false; -static bool VK_KHR_get_physical_device_properties2_enabled = false; -static bool VK_KHR_dedicated_allocation_enabled = false; -static bool VK_KHR_bind_memory2_enabled = false; -static bool VK_EXT_memory_budget_enabled = false; +bool g_EnableValidationLayer = true; +bool VK_KHR_get_memory_requirements2_enabled = false; +bool VK_KHR_get_physical_device_properties2_enabled = false; +bool VK_KHR_dedicated_allocation_enabled = false; +bool VK_KHR_bind_memory2_enabled = false; +bool VK_EXT_memory_budget_enabled = false; +bool VK_AMD_device_coherent_memory_enabled = false; bool g_SparseBindingEnabled = false; static HINSTANCE g_hAppInstance; @@ -1115,6 +1116,60 @@ static void DestroySwapchain(bool destroyActualSwapchain) } } +static constexpr uint32_t GetVulkanApiVersion() +{ + return VMA_VULKAN_VERSION == 1001000 ? VK_API_VERSION_1_1 : VK_API_VERSION_1_0; +} + +void SetAllocatorCreateInfo(VmaAllocatorCreateInfo& outInfo) +{ + outInfo = {}; + + outInfo.physicalDevice = g_hPhysicalDevice; + outInfo.device = g_hDevice; + outInfo.instance = g_hVulkanInstance; + outInfo.vulkanApiVersion = GetVulkanApiVersion(); + + if(VK_KHR_dedicated_allocation_enabled) + { + outInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + } + if(VK_KHR_bind_memory2_enabled) + { + outInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; + } +#if !defined(VMA_MEMORY_BUDGET) || VMA_MEMORY_BUDGET == 1 + if(VK_EXT_memory_budget_enabled && VK_KHR_get_physical_device_properties2_enabled) + { + outInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + } +#endif + if(VK_AMD_device_coherent_memory_enabled) + { + outInfo.flags |= VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT; + } + + if(USE_CUSTOM_CPU_ALLOCATION_CALLBACKS) + { + outInfo.pAllocationCallbacks = &g_CpuAllocationCallbacks; + } + + // Uncomment to enable recording to CSV file. + /* + static VmaRecordSettings recordSettings = {}; + recordSettings.pFilePath = "VulkanSample.csv"; + outInfo.pRecordSettings = &recordSettings; + */ + + // Uncomment to enable HeapSizeLimit. + /* + static std::array heapSizeLimit; + std::fill(heapSizeLimit.begin(), heapSizeLimit.end(), VK_WHOLE_SIZE); + heapSizeLimit[0] = 512ull * 1024 * 1024; + outInfo.pHeapSizeLimit = heapSizeLimit.data(); + */ +} + static void InitializeApplication() { if(USE_CUSTOM_CPU_ALLOCATION_CALLBACKS) @@ -1172,7 +1227,7 @@ static void InitializeApplication() appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); appInfo.pEngineName = "Adam Sawicki Engine"; appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); - appInfo.apiVersion = VMA_VULKAN_VERSION == 1001000 ? VK_API_VERSION_1_1 : VK_API_VERSION_1_0; + appInfo.apiVersion = GetVulkanApiVersion(); VkInstanceCreateInfo instInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO }; instInfo.pApplicationInfo = &appInfo; @@ -1204,15 +1259,53 @@ static void InitializeApplication() g_hPhysicalDevice = physicalDevices[0]; + // Query for extensions + + uint32_t physicalDeviceExtensionPropertyCount = 0; + ERR_GUARD_VULKAN( vkEnumerateDeviceExtensionProperties(g_hPhysicalDevice, nullptr, &physicalDeviceExtensionPropertyCount, nullptr) ); + std::vector physicalDeviceExtensionProperties{physicalDeviceExtensionPropertyCount}; + if(physicalDeviceExtensionPropertyCount) + { + ERR_GUARD_VULKAN( vkEnumerateDeviceExtensionProperties( + g_hPhysicalDevice, + nullptr, + &physicalDeviceExtensionPropertyCount, + physicalDeviceExtensionProperties.data()) ); + } + + for(uint32_t i = 0; i < physicalDeviceExtensionPropertyCount; ++i) + { + if(strcmp(physicalDeviceExtensionProperties[i].extensionName, VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME) == 0) + VK_KHR_get_memory_requirements2_enabled = true; + else if(strcmp(physicalDeviceExtensionProperties[i].extensionName, VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME) == 0) + VK_KHR_dedicated_allocation_enabled = true; + else if(strcmp(physicalDeviceExtensionProperties[i].extensionName, VK_KHR_BIND_MEMORY_2_EXTENSION_NAME) == 0) + VK_KHR_bind_memory2_enabled = true; + else if(strcmp(physicalDeviceExtensionProperties[i].extensionName, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME) == 0) + VK_EXT_memory_budget_enabled = true; + else if(strcmp(physicalDeviceExtensionProperties[i].extensionName, VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME) == 0) + VK_AMD_device_coherent_memory_enabled = true; + } + // Query for features VkPhysicalDeviceProperties physicalDeviceProperties = {}; vkGetPhysicalDeviceProperties(g_hPhysicalDevice, &physicalDeviceProperties); - VkPhysicalDeviceFeatures physicalDeviceFeatures = {}; - vkGetPhysicalDeviceFeatures(g_hPhysicalDevice, &physicalDeviceFeatures); + VkPhysicalDeviceFeatures2 physicalDeviceFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 }; + VkPhysicalDeviceCoherentMemoryFeaturesAMD physicalDeviceCoherentMemoryFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD }; + if(VK_AMD_device_coherent_memory_enabled) + { + physicalDeviceCoherentMemoryFeatures.pNext = physicalDeviceFeatures.pNext; + physicalDeviceFeatures.pNext = &physicalDeviceCoherentMemoryFeatures; + } + vkGetPhysicalDeviceFeatures2(g_hPhysicalDevice, &physicalDeviceFeatures); - g_SparseBindingEnabled = physicalDeviceFeatures.sparseBinding != 0; + g_SparseBindingEnabled = physicalDeviceFeatures.features.sparseBinding != 0; + + // The extension is supported as fake with no real support for this feature? Don't use it. + if(VK_AMD_device_coherent_memory_enabled && !physicalDeviceCoherentMemoryFeatures.deviceCoherentMemory) + VK_AMD_device_coherent_memory_enabled = false; // Find queue family index @@ -1289,105 +1382,45 @@ static void InitializeApplication() ++queueCount; } - VkPhysicalDeviceFeatures deviceFeatures = {}; - //deviceFeatures.fillModeNonSolid = VK_TRUE; - deviceFeatures.samplerAnisotropy = VK_TRUE; - deviceFeatures.sparseBinding = g_SparseBindingEnabled ? VK_TRUE : VK_FALSE; - - // Determine list of device extensions to enable. std::vector enabledDeviceExtensions; enabledDeviceExtensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if(VK_KHR_get_memory_requirements2_enabled) + enabledDeviceExtensions.push_back(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME); + if(VK_KHR_dedicated_allocation_enabled) + enabledDeviceExtensions.push_back(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME); + if(VK_KHR_bind_memory2_enabled) + enabledDeviceExtensions.push_back(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME); + if(VK_EXT_memory_budget_enabled) + enabledDeviceExtensions.push_back(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); + if(VK_AMD_device_coherent_memory_enabled) + enabledDeviceExtensions.push_back(VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME); + + VkPhysicalDeviceFeatures2 deviceFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 }; + deviceFeatures.features.samplerAnisotropy = VK_TRUE; + deviceFeatures.features.sparseBinding = g_SparseBindingEnabled ? VK_TRUE : VK_FALSE; + + if(VK_AMD_device_coherent_memory_enabled) { - uint32_t propertyCount = 0; - ERR_GUARD_VULKAN( vkEnumerateDeviceExtensionProperties(g_hPhysicalDevice, nullptr, &propertyCount, nullptr) ); - - if(propertyCount) - { - std::vector properties{propertyCount}; - ERR_GUARD_VULKAN( vkEnumerateDeviceExtensionProperties(g_hPhysicalDevice, nullptr, &propertyCount, properties.data()) ); - - for(uint32_t i = 0; i < propertyCount; ++i) - { - if(strcmp(properties[i].extensionName, VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME) == 0) - { - enabledDeviceExtensions.push_back(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME); - VK_KHR_get_memory_requirements2_enabled = true; - } - else if(strcmp(properties[i].extensionName, VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME) == 0) - { - enabledDeviceExtensions.push_back(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME); - VK_KHR_dedicated_allocation_enabled = true; - } - else if(strcmp(properties[i].extensionName, VK_KHR_BIND_MEMORY_2_EXTENSION_NAME) == 0) - { - enabledDeviceExtensions.push_back(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME); - VK_KHR_bind_memory2_enabled = true; - } - else if(strcmp(properties[i].extensionName, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME) == 0) - { - enabledDeviceExtensions.push_back(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); - VK_EXT_memory_budget_enabled = true; - } - } - } + physicalDeviceCoherentMemoryFeatures.pNext = deviceFeatures.pNext; + deviceFeatures.pNext = &physicalDeviceCoherentMemoryFeatures; + physicalDeviceCoherentMemoryFeatures.deviceCoherentMemory = VK_TRUE; } VkDeviceCreateInfo deviceCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; + deviceCreateInfo.pNext = &deviceFeatures; deviceCreateInfo.enabledLayerCount = 0; deviceCreateInfo.ppEnabledLayerNames = nullptr; deviceCreateInfo.enabledExtensionCount = (uint32_t)enabledDeviceExtensions.size(); deviceCreateInfo.ppEnabledExtensionNames = !enabledDeviceExtensions.empty() ? enabledDeviceExtensions.data() : nullptr; deviceCreateInfo.queueCreateInfoCount = queueCount; deviceCreateInfo.pQueueCreateInfos = queueCreateInfo; - deviceCreateInfo.pEnabledFeatures = &deviceFeatures; ERR_GUARD_VULKAN( vkCreateDevice(g_hPhysicalDevice, &deviceCreateInfo, g_Allocs, &g_hDevice) ); // Create memory allocator VmaAllocatorCreateInfo allocatorInfo = {}; - allocatorInfo.physicalDevice = g_hPhysicalDevice; - allocatorInfo.device = g_hDevice; - allocatorInfo.instance = g_hVulkanInstance; - allocatorInfo.vulkanApiVersion = appInfo.apiVersion; - - if(VK_KHR_dedicated_allocation_enabled) - { - allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; - } - if(VK_KHR_bind_memory2_enabled) - { - allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; - } -#if !defined(VMA_MEMORY_BUDGET) || VMA_MEMORY_BUDGET == 1 - if(VK_EXT_memory_budget_enabled && VK_KHR_get_physical_device_properties2_enabled) - { - allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; - } -#endif - - if(USE_CUSTOM_CPU_ALLOCATION_CALLBACKS) - { - allocatorInfo.pAllocationCallbacks = &g_CpuAllocationCallbacks; - } - - // Uncomment to enable recording to CSV file. - /* - { - VmaRecordSettings recordSettings = {}; - recordSettings.pFilePath = "VulkanSample.csv"; - allocatorInfo.pRecordSettings = &recordSettings; - } - */ - - // Uncomment to enable HeapSizeLimit. - /* - std::array heapSizeLimit; - std::fill(heapSizeLimit.begin(), heapSizeLimit.end(), VK_WHOLE_SIZE); - heapSizeLimit[0] = 512ull * 1024 * 1024; - allocatorInfo.pHeapSizeLimit = heapSizeLimit.data(); - */ - + SetAllocatorCreateInfo(allocatorInfo); ERR_GUARD_VULKAN( vmaCreateAllocator(&allocatorInfo, &g_hAllocator) ); // Retrieve queues (don't need to be destroyed). diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index 5f4d7ff..369817f 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -1925,6 +1925,24 @@ typedef enum VmaAllocatorCreateFlagBits { be more accurate than an estimation used by the library otherwise. */ VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, + /** + Enabled usage of VK_AMD_device_coherent_memory extension. + + You may set this flag only if you: + + - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, + - want it to be used internally by this library. + + The extension and accompanying device feature provide access to memory types with + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. + They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + + When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. + To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, + returning `VK_ERROR_FEATURE_NOT_PRESENT`. + */ + VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; @@ -2908,6 +2926,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( /** \brief Deprecated. +\deprecated In version 2.2.0 it used to try to change allocation's size without moving or reallocating it. In current version it returns `VK_SUCCESS` only if `newSize` equals current allocation's size. Otherwise returns `VK_ERROR_OUT_OF_POOL_MEMORY`, indicating that allocation's size could not be changed. @@ -3858,6 +3877,12 @@ static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; END OF CONFIGURATION */ +// # Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. + +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; + + static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; static VkAllocationCallbacks VmaEmptyAllocationCallbacks = { @@ -6926,7 +6951,8 @@ public: uint32_t vulkanApiVersion, bool dedicatedAllocationExtensionEnabled, bool bindMemory2ExtensionEnabled, - bool memoryBudgetExtensionEnabled); + bool memoryBudgetExtensionEnabled, + bool deviceCoherentMemoryExtensionEnabled); ~VmaRecorder(); void RecordCreateAllocator(uint32_t frameIndex); @@ -7124,6 +7150,7 @@ public: bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseExtMemoryBudget; + bool m_UseAmdDeviceCoherentMemory; VkDevice m_hDevice; VkInstance m_hInstance; bool m_AllocationCallbacksSpecified; @@ -7194,6 +7221,8 @@ public: return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; } + uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + #if VMA_RECORDING_ENABLED VmaRecorder* GetRecorder() const { return m_pRecorder; } #endif @@ -7328,6 +7357,9 @@ private: VmaVulkanFunctions m_VulkanFunctions; + // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. + uint32_t m_GlobalMemoryTypeBits; + #if VMA_RECORDING_ENABLED VmaRecorder* m_pRecorder; #endif @@ -7381,6 +7413,8 @@ private: */ uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; + uint32_t CalculateGlobalMemoryTypeBits() const; + #if VMA_MEMORY_BUDGET void UpdateVulkanBudget(); #endif // #if VMA_MEMORY_BUDGET @@ -14747,7 +14781,8 @@ void VmaRecorder::WriteConfiguration( uint32_t vulkanApiVersion, bool dedicatedAllocationExtensionEnabled, bool bindMemory2ExtensionEnabled, - bool memoryBudgetExtensionEnabled) + bool memoryBudgetExtensionEnabled, + bool deviceCoherentMemoryExtensionEnabled) { fprintf(m_File, "Config,Begin\n"); @@ -14780,6 +14815,7 @@ void VmaRecorder::WriteConfiguration( fprintf(m_File, "Extension,VK_KHR_dedicated_allocation,%u\n", dedicatedAllocationExtensionEnabled ? 1 : 0); fprintf(m_File, "Extension,VK_KHR_bind_memory2,%u\n", bindMemory2ExtensionEnabled ? 1 : 0); fprintf(m_File, "Extension,VK_EXT_memory_budget,%u\n", memoryBudgetExtensionEnabled ? 1 : 0); + fprintf(m_File, "Extension,VK_AMD_device_coherent_memory,%u\n", deviceCoherentMemoryExtensionEnabled ? 1 : 0); fprintf(m_File, "Macro,VMA_DEBUG_ALWAYS_DEDICATED_MEMORY,%u\n", VMA_DEBUG_ALWAYS_DEDICATED_MEMORY ? 1 : 0); fprintf(m_File, "Macro,VMA_DEBUG_ALIGNMENT,%llu\n", (VkDeviceSize)VMA_DEBUG_ALIGNMENT); @@ -14854,6 +14890,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), + m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), m_hDevice(pCreateInfo->device), m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), @@ -14866,7 +14903,8 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_CurrentFrameIndex(0), m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), m_Pools(VmaStlAllocator(GetAllocationCallbacks())), - m_NextPoolId(0) + m_NextPoolId(0), + m_GlobalMemoryTypeBits(UINT32_MAX) #if VMA_RECORDING_ENABLED ,m_pRecorder(VMA_NULL) #endif @@ -14940,6 +14978,8 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) { for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) @@ -14998,7 +15038,8 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) m_VulkanApiVersion, m_UseKhrDedicatedAllocation, m_UseKhrBindMemory2, - m_UseExtMemoryBudget); + m_UseExtMemoryBudget, + m_UseAmdDeviceCoherentMemory); m_pRecorder->RecordCreateAllocator(GetCurrentFrameIndex()); #else VMA_ASSERT(0 && "VmaAllocatorCreateInfo::pRecordSettings used, but not supported due to VMA_RECORDING_ENABLED not defined to 1."); @@ -16045,6 +16086,12 @@ VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPoo { return VK_ERROR_INITIALIZATION_FAILED; } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); @@ -16522,6 +16569,28 @@ uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const return memoryTypeBits; } +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ + // Make sure memory information is already fetched. + VMA_ASSERT(GetMemoryTypeCount() > 0); + + uint32_t memoryTypeBits = UINT32_MAX; + + if(!m_UseAmdDeviceCoherentMemory) + { + // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + memoryTypeBits &= ~(1u << memTypeIndex); + } + } + } + + return memoryTypeBits; +} + #if VMA_MEMORY_BUDGET void VmaAllocator_T::UpdateVulkanBudget() @@ -16849,6 +16918,18 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( { json.WriteString("LAZILY_ALLOCATED"); } + if((flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) + { + json.WriteString(" PROTECTED"); + } + if((flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + json.WriteString(" DEVICE_COHERENT"); + } + if((flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) != 0) + { + json.WriteString(" DEVICE_UNCACHED"); + } json.EndArray(); if(stats.memoryType[typeIndex].blockCount > 0) @@ -16908,6 +16989,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + memoryTypeBits &= allocator->GetGlobalMemoryTypeBits(); + if(pAllocationCreateInfo->memoryTypeBits != 0) { memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; @@ -16953,6 +17036,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( break; } + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((pAllocationCreateInfo->requiredFlags | pAllocationCreateInfo->preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY; + } + *pMemoryTypeIndex = UINT32_MAX; uint32_t minCost = UINT32_MAX; for(uint32_t memTypeIndex = 0, memTypeBit = 1; diff --git a/tools/VmaDumpVis/VmaDumpVis.py b/tools/VmaDumpVis/VmaDumpVis.py index efd4d76..f5a38b1 100644 --- a/tools/VmaDumpVis/VmaDumpVis.py +++ b/tools/VmaDumpVis/VmaDumpVis.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal